From b8eb3a7b6d6453075295bdebd952083d182cc2fe Mon Sep 17 00:00:00 2001
From: "blathers-crl[bot]"
<63125349+blathers-crl[bot]@users.noreply.github.com>
Date: Thu, 24 Oct 2024 10:48:09 +0000
Subject: [PATCH] tmp
---
.github/workflows/update_releases.yaml | 1 -
DEPS.bzl | 6 +-
build/bazelutil/distdir_files.bzl | 2 +-
docs/generated/metrics/metrics.html | 13 +-
.../settings/settings-for-tenants.txt | 2 +-
docs/generated/settings/settings.html | 2 +-
go.mod | 2 +-
go.sum | 4 +-
pkg/BUILD.bazel | 3 -
pkg/build/version.txt | 2 +-
pkg/ccl/backupccl/backup_tenant_test.go | 1 +
pkg/ccl/backupccl/backup_test.go | 96 ----
pkg/ccl/backupccl/show_test.go | 23 -
pkg/ccl/crosscluster/BUILD.bazel | 11 +
.../crosscluster_type_resolver.go} | 24 +-
pkg/ccl/crosscluster/logical/BUILD.bazel | 1 -
.../create_logical_replication_stmt.go | 12 +-
.../logical/logical_replication_job.go | 6 +-
.../logical/logical_replication_job_test.go | 139 ++++-
.../crosscluster/logical/lww_row_processor.go | 13 +
.../streamclient/partitioned_stream_client.go | 4 +-
.../logictestccl/testdata/logic_test/generic | 27 -
pkg/cli/testdata/declarative-rules/deprules | 2 +-
.../declarative-rules/invalid_version | 4 +-
pkg/clusterversion/cockroach_versions.go | 2 +-
pkg/cmd/drtprod/configs/drt_chaos.yaml | 4 -
pkg/cmd/drtprod/configs/drt_large.yaml | 4 -
pkg/cmd/drtprod/configs/drt_scale.yaml | 12 +-
.../drtprod/configs/drt_scale_operations.yaml | 2 +-
pkg/cmd/drtprod/scripts/tpcc_init.sh | 32 +-
pkg/cmd/roachtest/operations/add_column.go | 18 -
pkg/cmd/roachtest/operations/add_index.go | 22 +-
pkg/cmd/roachtest/operations/utils.go | 24 -
pkg/cmd/roachtest/tests/activerecord.go | 2 +-
.../tests/admission_control_latency.go | 5 -
pkg/cmd/roachtest/tests/follower_reads.go | 7 +
pkg/cmd/roachtest/tests/pgjdbc_blocklist.go | 1 -
pkg/cmd/roachtest/tests/ruby_pg.go | 2 +-
pkg/kv/kvserver/BUILD.bazel | 3 +-
pkg/kv/kvserver/client_merge_test.go | 40 +-
pkg/kv/kvserver/client_raft_log_queue_test.go | 62 ---
.../kvserver/flow_control_integration_test.go | 65 +++
pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go | 28 +
.../kvflowcontrol/rac2/range_controller.go | 43 +-
.../rac2/range_controller_test.go | 70 ++-
.../kvflowcontrol/rac2/store_stream.go | 4 +-
.../kvflowcontrol/rac2/token_counter.go | 86 +++-
.../kvflowcontrol/rac2/token_counter_test.go | 26 +-
.../kvflowcontrol/replica_rac2/processor.go | 5 +-
.../replica_rac2/processor_test.go | 2 +-
.../kvflowcontrol/replica_rac2/raft_node.go | 9 +
pkg/kv/kvserver/kvserverpb/raft.proto | 16 -
pkg/kv/kvserver/raft.go | 2 +-
pkg/kv/kvserver/rafttrace/BUILD.bazel | 38 --
pkg/kv/kvserver/rafttrace/rafttrace.go | 477 ------------------
pkg/kv/kvserver/rafttrace/rafttrace_test.go | 344 -------------
pkg/kv/kvserver/replica.go | 5 -
.../kvserver/replica_application_decoder.go | 2 +-
pkg/kv/kvserver/replica_application_result.go | 6 +-
.../replica_application_result_test.go | 16 +-
pkg/kv/kvserver/replica_destroy.go | 1 -
pkg/kv/kvserver/replica_init.go | 4 +-
pkg/kv/kvserver/replica_proposal.go | 22 +-
pkg/kv/kvserver/replica_proposal_buf.go | 49 +-
pkg/kv/kvserver/replica_proposal_buf_test.go | 5 +-
pkg/kv/kvserver/replica_raft.go | 44 +-
pkg/kv/kvserver/replica_store_liveness.go | 15 +-
pkg/kv/kvserver/replica_test.go | 10 +-
pkg/kv/kvserver/store.go | 5 -
pkg/kv/kvserver/store_snapshot.go | 9 +-
.../testdata/replica_unavailable_error.txt | 2 +-
pkg/raft/BUILD.bazel | 1 +
pkg/raft/node_test.go | 10 +-
pkg/raft/quorum/joint.go | 14 +
pkg/raft/quorum/quorum_test.go | 19 +
pkg/raft/raft.go | 38 +-
pkg/raft/raft_test.go | 45 +-
pkg/raft/raftpb/raft.go | 6 -
pkg/raft/rawnode_test.go | 31 +-
pkg/raft/status.go | 4 +-
pkg/raft/storage.go | 169 ++++---
pkg/raft/storage_test.go | 101 ++--
pkg/raft/tracker/fortificationtracker.go | 27 +-
pkg/raft/types.go | 13 +-
pkg/raft/util.go | 4 -
.../install/files/cockroachdb-logging.yaml | 6 +-
pkg/server/BUILD.bazel | 4 +
pkg/server/api_v2.go | 2 +
pkg/server/http_metrics.go | 114 +++++
pkg/server/http_metrics_test.go | 262 ++++++++++
pkg/sql/alter_default_privileges.go | 12 +-
pkg/sql/alter_table.go | 13 +-
pkg/sql/catalog/bootstrap/testdata/testdata | 8 +-
.../testdata/bootstrap_system | 2 +-
.../testdata/bootstrap_tenant | 2 +-
.../tabledesc/logical_replication_helpers.go | 94 +++-
pkg/sql/conn_executor.go | 10 +
pkg/sql/conn_executor_exec.go | 2 +-
pkg/sql/exec_log.go | 102 ++--
pkg/sql/exec_util.go | 16 +-
pkg/sql/executor_statement_metrics.go | 6 +-
pkg/sql/importer/BUILD.bazel | 3 +-
pkg/sql/importer/read_import_base.go | 3 +-
pkg/sql/instrumentation.go | 3 +-
pkg/sql/logictest/REPOSITORIES.bzl | 20 +-
.../alter_default_privileges_for_schema | 4 -
.../alter_default_privileges_for_sequence | 3 -
.../alter_default_privileges_for_table | 24 +-
.../alter_default_privileges_for_type | 3 -
.../alter_default_privileges_in_schema | 3 -
.../testdata/logic_test/crdb_internal_catalog | 2 +-
.../testdata/logic_test/reassign_owned_by | 11 +
.../logic_test/show_default_privileges | 1 -
pkg/sql/opt/exec/execbuilder/BUILD.bazel | 1 +
pkg/sql/opt/exec/execbuilder/builder.go | 38 +-
pkg/sql/opt/exec/execbuilder/relational.go | 15 +-
.../exec/execbuilder/testdata/inverted_index | 57 ++-
.../opt/memo/testdata/stats/inverted-array | 8 +-
pkg/sql/opt/memo/testdata/stats/inverted-json | 132 ++++-
pkg/sql/opt/props/histogram.go | 60 ---
pkg/sql/opt/props/histogram_test.go | 49 --
pkg/sql/opt/xform/rules/select.opt | 16 -
pkg/sql/opt/xform/select_funcs.go | 148 +-----
pkg/sql/opt/xform/testdata/rules/select | 471 +----------------
pkg/sql/plan_opt.go | 210 ++++----
pkg/sql/prepared_stmt.go | 17 +-
pkg/sql/reassign_owned_by.go | 36 +-
.../scbuild/internal/scbuildstmt/helpers.go | 22 +-
pkg/sql/sem/tree/schema_helpers.go | 45 +-
pkg/sql/sem/tree/schema_helpers_test.go | 20 +-
pkg/storage/pebble.go | 20 +-
pkg/storage/pebble_key_schema.go | 42 +-
pkg/storage/pebble_key_schema_test.go | 17 +-
pkg/storage/pebble_test.go | 52 +-
.../lint/passes/fmtsafe/functions.go | 2 -
.../lint/passes/redactcheck/redactcheck.go | 12 +-
pkg/testutils/release/cockroach_releases.yaml | 6 +-
.../cluster-ui/src/api/databaseDetailsApi.ts | 4 -
.../src/databasesPage/databasesPage.tsx | 23 -
.../databaseDetailsSpanStats.saga.spec.ts | 1 -
.../db-console/src/util/api.spec.ts | 2 -
.../nodeGraphs/dashboards/overview.tsx | 9 +-
.../containers/nodeGraphs/dashboards/sql.tsx | 9 +-
.../containers/nodeGraphs/summaryBar.tsx | 27 +-
pkg/util/admission/snapshot_queue.go | 20 +-
pkg/util/admission/snapshot_queue_test.go | 5 +-
pkg/util/metric/histogram_buckets.go | 11 +-
pkg/util/metric/metric.go | 81 +++
pkg/util/metric/metric_test.go | 112 ++++
.../metric/testdata/ResponseTime30sBuckets | 27 +
pkg/util/tracing/tracingpb/recorded_span.go | 4 -
pkg/workload/BUILD.bazel | 3 -
pkg/workload/cli/BUILD.bazel | 1 -
pkg/workload/cli/check.go | 23 +-
pkg/workload/datadog.go | 69 ---
.../schemachange/operation_generator.go | 7 +-
pkg/workload/schemachange/schemachange.go | 22 +-
scripts/bump-pebble.sh | 2 +-
158 files changed, 2199 insertions(+), 3028 deletions(-)
rename pkg/{sql/importer/import_type_resolver.go => ccl/crosscluster/crosscluster_type_resolver.go} (80%)
delete mode 100644 pkg/kv/kvserver/rafttrace/BUILD.bazel
delete mode 100644 pkg/kv/kvserver/rafttrace/rafttrace.go
delete mode 100644 pkg/kv/kvserver/rafttrace/rafttrace_test.go
create mode 100644 pkg/server/http_metrics.go
create mode 100644 pkg/server/http_metrics_test.go
create mode 100644 pkg/util/metric/testdata/ResponseTime30sBuckets
delete mode 100644 pkg/workload/datadog.go
diff --git a/.github/workflows/update_releases.yaml b/.github/workflows/update_releases.yaml
index 86427d57d0f7..cb65bf0eab4c 100644
--- a/.github/workflows/update_releases.yaml
+++ b/.github/workflows/update_releases.yaml
@@ -31,7 +31,6 @@ jobs:
- "release-23.2"
- "release-24.1"
- "release-24.2"
- - "release-24.3"
name: Update pkg/testutils/release/cockroach_releases.yaml on ${{ matrix.branch }}
runs-on: ubuntu-latest
steps:
diff --git a/DEPS.bzl b/DEPS.bzl
index 6ab96672742c..5448be130659 100644
--- a/DEPS.bzl
+++ b/DEPS.bzl
@@ -1818,10 +1818,10 @@ def go_deps():
patches = [
"@com_github_cockroachdb_cockroach//build/patches:com_github_cockroachdb_pebble.patch",
],
- sha256 = "8c165990dc3d4d67618b19e45e2c79f5f48ab9df4e19f881ee1cfa82cdd009df",
- strip_prefix = "github.com/cockroachdb/pebble@v0.0.0-20241017195839-1d2e9e829b92",
+ sha256 = "a72c365ccf143d2bdb7c9619bab0a577568bb205b5d298711f32297098747b7c",
+ strip_prefix = "github.com/cockroachdb/pebble@v0.0.0-20241023221932-8bf23da79c5c",
urls = [
- "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20241017195839-1d2e9e829b92.zip",
+ "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20241023221932-8bf23da79c5c.zip",
],
)
go_repository(
diff --git a/build/bazelutil/distdir_files.bzl b/build/bazelutil/distdir_files.bzl
index bf391b59f57f..b02be4e1ab55 100644
--- a/build/bazelutil/distdir_files.bzl
+++ b/build/bazelutil/distdir_files.bzl
@@ -345,7 +345,7 @@ DISTDIR_FILES = {
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/gostdlib/com_github_cockroachdb_gostdlib-v1.19.0.zip": "c4d516bcfe8c07b6fc09b8a9a07a95065b36c2855627cb3514e40c98f872b69e",
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/logtags/com_github_cockroachdb_logtags-v0.0.0-20230118201751-21c54148d20b.zip": "ca7776f47e5fecb4c495490a679036bfc29d95bd7625290cfdb9abb0baf97476",
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/metamorphic/com_github_cockroachdb_metamorphic-v0.0.0-20231108215700-4ba948b56895.zip": "28c8cf42192951b69378cf537be5a9a43f2aeb35542908cc4fe5f689505853ea",
- "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20241017195839-1d2e9e829b92.zip": "8c165990dc3d4d67618b19e45e2c79f5f48ab9df4e19f881ee1cfa82cdd009df",
+ "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20241023221932-8bf23da79c5c.zip": "a72c365ccf143d2bdb7c9619bab0a577568bb205b5d298711f32297098747b7c",
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/redact/com_github_cockroachdb_redact-v1.1.5.zip": "11b30528eb0dafc8bc1a5ba39d81277c257cbe6946a7564402f588357c164560",
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/returncheck/com_github_cockroachdb_returncheck-v0.0.0-20200612231554-92cdbca611dd.zip": "ce92ba4352deec995b1f2eecf16eba7f5d51f5aa245a1c362dfe24c83d31f82b",
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/stress/com_github_cockroachdb_stress-v0.0.0-20220803192808-1806698b1b7b.zip": "3fda531795c600daf25532a4f98be2a1335cd1e5e182c72789bca79f5f69fcc1",
diff --git a/docs/generated/metrics/metrics.html b/docs/generated/metrics/metrics.html
index 4bbbd213e189..4d8f7d88dc8f 100644
--- a/docs/generated/metrics/metrics.html
+++ b/docs/generated/metrics/metrics.html
@@ -1541,6 +1541,7 @@
APPLICATION | schedules.scheduled-sql-stats-compaction-executor.failed | Number of scheduled-sql-stats-compaction-executor jobs failed | Jobs | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | schedules.scheduled-sql-stats-compaction-executor.started | Number of scheduled-sql-stats-compaction-executor jobs started | Jobs | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | schedules.scheduled-sql-stats-compaction-executor.succeeded | Number of scheduled-sql-stats-compaction-executor jobs succeeded | Jobs | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+APPLICATION | server.http.request.duration.nanos | Duration of an HTTP request in nanoseconds. | Duration | HISTOGRAM | NANOSECONDS | AVG | NONE |
APPLICATION | sql.bytesin | Number of SQL bytes received | SQL Bytes | COUNTER | BYTES | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.bytesout | Number of SQL bytes sent | SQL Bytes | COUNTER | BYTES | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.conn.failures | Number of SQL connection failures | Connections | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
@@ -1560,6 +1561,10 @@
APPLICATION | sql.copy.nonatomic.started.count.internal | Number of non-atomic COPY SQL statements started (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.copy.started.count | Number of COPY SQL statements started | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.copy.started.count.internal | Number of COPY SQL statements started (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+APPLICATION | sql.crud_query.count | Number of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executed | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+APPLICATION | sql.crud_query.count.internal | Number of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executed (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+APPLICATION | sql.crud_query.started.count | Number of SQL SELECT, INSERT, UPDATE, DELETE statements started | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+APPLICATION | sql.crud_query.started.count.internal | Number of SQL SELECT, INSERT, UPDATE, DELETE statements started (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.ddl.count | Number of SQL DDL statements successfully executed | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.ddl.count.internal | Number of SQL DDL statements successfully executed (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.ddl.started.count | Number of SQL DDL statements started | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
@@ -1675,10 +1680,10 @@
APPLICATION | sql.pre_serve.mem.cur | Current memory usage for SQL connections prior to routing the connection to the target SQL server | Memory | GAUGE | BYTES | AVG | NONE |
APPLICATION | sql.pre_serve.mem.max | Memory usage for SQL connections prior to routing the connection to the target SQL server | Memory | HISTOGRAM | BYTES | AVG | NONE |
APPLICATION | sql.pre_serve.new_conns | Number of SQL connections created prior to routing the connection to the target SQL server | Connections | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
-APPLICATION | sql.query.count | Number of SQL queries executed | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
-APPLICATION | sql.query.count.internal | Number of SQL queries executed (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
-APPLICATION | sql.query.started.count | Number of SQL queries started | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
-APPLICATION | sql.query.started.count.internal | Number of SQL queries started (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+APPLICATION | sql.query.count | Number of SQL operations started including queries, and transaction control statements | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+APPLICATION | sql.query.count.internal | Number of SQL operations started including queries, and transaction control statements (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+APPLICATION | sql.query.started.count | Number of SQL operations started including queries, and transaction control statements | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
+APPLICATION | sql.query.started.count.internal | Number of SQL operations started including queries, and transaction control statements (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.restart_savepoint.count | Number of `SAVEPOINT cockroach_restart` statements successfully executed | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.restart_savepoint.count.internal | Number of `SAVEPOINT cockroach_restart` statements successfully executed (internal queries) | SQL Internal Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | sql.restart_savepoint.release.count | Number of `RELEASE SAVEPOINT cockroach_restart` statements successfully executed | SQL Statements | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt
index bbe69ae53f43..7134c0d894a0 100644
--- a/docs/generated/settings/settings-for-tenants.txt
+++ b/docs/generated/settings/settings-for-tenants.txt
@@ -401,4 +401,4 @@ trace.snapshot.rate duration 0s if non-zero, interval at which background trace
trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https:///#/debug/tracez application
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used. application
ui.display_timezone enumeration etc/utc the timezone used to format timestamps in the ui [etc/utc = 0, america/new_york = 1] application
-version version 1000024.2-upgrading-to-1000024.3-step-022 set the active cluster version in the format '.' application
+version version 24.2-upgrading-to-24.3-step-022 set the active cluster version in the format '.' application
diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html
index 525c0e5e827d..ab5c41a3ada6 100644
--- a/docs/generated/settings/settings.html
+++ b/docs/generated/settings/settings.html
@@ -359,6 +359,6 @@
trace.span_registry.enabled
| boolean | true | if set, ongoing traces can be seen at https://<ui>/#/debug/tracez | Serverless/Dedicated/Self-Hosted |
trace.zipkin.collector
| string |
| the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used. | Serverless/Dedicated/Self-Hosted |
ui.display_timezone
| enumeration | etc/utc | the timezone used to format timestamps in the ui [etc/utc = 0, america/new_york = 1] | Serverless/Dedicated/Self-Hosted |
-version
| version | 1000024.2-upgrading-to-1000024.3-step-022 | set the active cluster version in the format '<major>.<minor>' | Serverless/Dedicated/Self-Hosted |
+version
| version | 24.2-upgrading-to-24.3-step-022 | set the active cluster version in the format '<major>.<minor>' | Serverless/Dedicated/Self-Hosted |
diff --git a/go.mod b/go.mod
index 40921e4081a8..8226a5e00c85 100644
--- a/go.mod
+++ b/go.mod
@@ -135,7 +135,7 @@ require (
github.com/cockroachdb/go-test-teamcity v0.0.0-20191211140407-cff980ad0a55
github.com/cockroachdb/gostdlib v1.19.0
github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b
- github.com/cockroachdb/pebble v0.0.0-20241017195839-1d2e9e829b92
+ github.com/cockroachdb/pebble v0.0.0-20241023221932-8bf23da79c5c
github.com/cockroachdb/redact v1.1.5
github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd
github.com/cockroachdb/stress v0.0.0-20220803192808-1806698b1b7b
diff --git a/go.sum b/go.sum
index e1a96f46f20d..20658cb6efe5 100644
--- a/go.sum
+++ b/go.sum
@@ -536,8 +536,8 @@ github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZe
github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs=
github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA=
github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895/go.mod h1:aPd7gM9ov9M8v32Yy5NJrDyOcD8z642dqs+F0CeNXfA=
-github.com/cockroachdb/pebble v0.0.0-20241017195839-1d2e9e829b92 h1:AEWpYdO8k0gpPWZtpP8CyTr901vv7yxKVrzkXz5Vte8=
-github.com/cockroachdb/pebble v0.0.0-20241017195839-1d2e9e829b92/go.mod h1:XmS8uVDd9YFw/1R7J0J/CmTUANwT7iGnBRxH9AyDA90=
+github.com/cockroachdb/pebble v0.0.0-20241023221932-8bf23da79c5c h1:KxaJAPo1rdkJdghI6y4GhHUDNIBMsvTz8fW6nThzWLg=
+github.com/cockroachdb/pebble v0.0.0-20241023221932-8bf23da79c5c/go.mod h1:XmS8uVDd9YFw/1R7J0J/CmTUANwT7iGnBRxH9AyDA90=
github.com/cockroachdb/redact v1.1.3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=
github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30=
github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=
diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel
index f3bc4fc7a43d..bdf953d5406c 100644
--- a/pkg/BUILD.bazel
+++ b/pkg/BUILD.bazel
@@ -269,7 +269,6 @@ ALL_TESTS = [
"//pkg/kv/kvserver/protectedts:protectedts_test",
"//pkg/kv/kvserver/raftentry:raftentry_test",
"//pkg/kv/kvserver/raftlog:raftlog_test",
- "//pkg/kv/kvserver/rafttrace:rafttrace_test",
"//pkg/kv/kvserver/raftutil:raftutil_test",
"//pkg/kv/kvserver/rangefeed:rangefeed_test",
"//pkg/kv/kvserver/rangelog:rangelog_test",
@@ -1514,8 +1513,6 @@ GO_TARGETS = [
"//pkg/kv/kvserver/raftentry:raftentry_test",
"//pkg/kv/kvserver/raftlog:raftlog",
"//pkg/kv/kvserver/raftlog:raftlog_test",
- "//pkg/kv/kvserver/rafttrace:rafttrace",
- "//pkg/kv/kvserver/rafttrace:rafttrace_test",
"//pkg/kv/kvserver/raftutil:raftutil",
"//pkg/kv/kvserver/raftutil:raftutil_test",
"//pkg/kv/kvserver/rangefeed:rangefeed",
diff --git a/pkg/build/version.txt b/pkg/build/version.txt
index a6f0b11222a8..20e1b7af7c9d 100644
--- a/pkg/build/version.txt
+++ b/pkg/build/version.txt
@@ -1 +1 @@
-v24.3.0-alpha.3
+v24.3.0-beta.2
diff --git a/pkg/ccl/backupccl/backup_tenant_test.go b/pkg/ccl/backupccl/backup_tenant_test.go
index 47962069654e..6643e8e58b01 100644
--- a/pkg/ccl/backupccl/backup_tenant_test.go
+++ b/pkg/ccl/backupccl/backup_tenant_test.go
@@ -42,6 +42,7 @@ func TestBackupSharedProcessTenantNodeDown(t *testing.T) {
ctx := context.Background()
skip.UnderRace(t, "multi-node, multi-tenant test too slow under race")
+ skip.UnderDeadlock(t, "too slow under deadlock detector")
params := base.TestClusterArgs{
ServerArgs: base.TestServerArgs{
DefaultTestTenant: base.TestControlsTenantsExplicitly,
diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go
index 54c006f43ae8..6d85a48b18d9 100644
--- a/pkg/ccl/backupccl/backup_test.go
+++ b/pkg/ccl/backupccl/backup_test.go
@@ -4105,102 +4105,6 @@ func TestBackupRestoreChecksum(t *testing.T) {
sqlDB.ExpectErr(t, "checksum mismatch", `RESTORE data.* FROM $1`, localFoo)
}
-// TestNonLinearChain observes the effect of a non-linear chain of backups, for
-// example if two inc backups run concurrently, where the second starts before
-// the first finishes and thus does not use the first's end time when picking a
-// start time. In such a chain this first backup is made redundant by the second
-// and should be ignored by restore rather than restored.
-func TestNonLinearChain(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- dir, cleanup := testutils.TempDir(t)
- defer cleanup()
-
- tc := testcluster.NewTestCluster(t, 1, base.TestClusterArgs{ServerArgs: base.TestServerArgs{
- DefaultTestTenant: base.TODOTestTenantDisabled, ExternalIODir: dir, Knobs: base.TestingKnobs{
- JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(),
- },
- }})
-
- tc.Start(t)
- defer tc.Stopper().Stop(context.Background())
-
- sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0])
-
- // Make a table with a row in it and make a full backup of it.
- sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY)`)
- sqlDB.Exec(t, `INSERT INTO t VALUES (0)`)
- sqlDB.Exec(t, `BACKUP TABLE defaultdb.t INTO $1`, localFoo)
- require.Len(t, sqlDB.QueryStr(t, `SELECT DISTINCT end_time FROM [SHOW BACKUP LATEST IN $1]`, localFoo), 1)
-
- // Write a row and note the time that includes that row.
- var ts1, ts2 string
- sqlDB.Exec(t, `INSERT INTO t VALUES (1)`)
- sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts1)
-
- // Start *but pause rather than finish* an inc backup to ts1 of our new row.
- var j jobspb.JobID
- sqlDB.Exec(t, `SET CLUSTER SETTING jobs.debug.pausepoints = 'backup.before.flow'`)
- sqlDB.QueryRow(t, fmt.Sprintf(`BACKUP TABLE defaultdb.t INTO LATEST IN $1 AS OF SYSTEM TIME %s WITH DETACHED`, ts1), localFoo).Scan(&j)
- jobutils.WaitForJobToPause(t, sqlDB, j)
- sqlDB.Exec(t, `RESET CLUSTER SETTING jobs.debug.pausepoints`)
-
- // Add another row and record the time that includes it.
- sqlDB.Exec(t, `INSERT INTO t VALUES (2)`)
- sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts2)
-
- // Run -- and finish -- an inc backup to ts2. Since the first inc has not yet
- // finished, this will find the full as its parent and use its end, rather
- // than the paused inc, as its start time.
- sqlDB.Exec(t, fmt.Sprintf(`BACKUP TABLE defaultdb.t INTO LATEST IN $1 AS OF SYSTEM TIME %s`, ts2), localFoo)
-
- // We should see two end times now in the shown backup -- the full and this
- // (second) inc.
- require.Len(t, sqlDB.QueryStr(t, `SELECT DISTINCT end_time FROM [SHOW BACKUP LATEST IN $1]`, localFoo), 2)
-
- // Now we have a full ending at t0, an incomplete inc from t0 to t1, and a
- // complete inc also from t0 but to t2. We will move `t` out of our way and
- // run a restore of the chain, i.e. to t2 to see what happens, noting how many
- // files we open to do so.
- sqlDB.Exec(t, `DROP TABLE t`)
- openedBefore := tc.Servers[0].MustGetSQLCounter("cloud.readers_opened")
- sqlDB.Exec(t, `RESTORE TABLE defaultdb.t FROM LATEST IN $1`, localFoo)
- sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"0"}, {"1"}, {"2"}})
-
- // Note how many files the restore opened.
- openedA := tc.Servers[0].MustGetSQLCounter("cloud.readers_opened") - openedBefore
-
- // Now let's let the paused backup finish, adding a bonus "spur" to the chian.
- sqlDB.Exec(t, `RESUME JOB $1`, j)
- jobutils.WaitForJobToSucceed(t, sqlDB, j)
-
- // We should see three end times now in the shown backup -- the full, the 2nd
- // inc we saw before, but now also this first inc as well.
- require.Len(t, sqlDB.QueryStr(t, `SELECT DISTINCT end_time FROM [SHOW BACKUP LATEST IN $1]`, localFoo), 3)
-
- // Restore the same thing -- t2 -- we did before but now with the extra inc
- // spur hanging out in the chain. This should produce the same result, and we
- // would like it to only open one extra file to do so -- the manifest that
- // includes the timestamps that then show it is not needed by the restore.
- sqlDB.Exec(t, `DROP TABLE t`)
- sqlDB.Exec(t, `RESTORE TABLE defaultdb.t FROM LATEST IN $1`, localFoo)
- sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"0"}, {"1"}, {"2"}})
- openedB := tc.Servers[0].MustGetSQLCounter("cloud.readers_opened") - openedA - openedBefore
- // TODO(dt): enable this assertion once it holds.
- if false {
- require.Equal(t, openedA+1, openedB)
- } else {
- require.Less(t, openedA+1, openedB)
- }
-
- // Finally, make sure we can restore from the tip of the spur, not just the
- // tip of the chain.
- sqlDB.Exec(t, `DROP TABLE t`)
- sqlDB.Exec(t, fmt.Sprintf(`RESTORE TABLE defaultdb.t FROM LATEST IN $1 AS OF SYSTEM TIME %s`, ts1), localFoo)
- sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"0"}, {"1"}})
-}
-
func TestTimestampMismatch(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
diff --git a/pkg/ccl/backupccl/show_test.go b/pkg/ccl/backupccl/show_test.go
index 30a55bb21364..7331fb351677 100644
--- a/pkg/ccl/backupccl/show_test.go
+++ b/pkg/ccl/backupccl/show_test.go
@@ -698,29 +698,6 @@ func TestShowBackupWithDebugIDs(t *testing.T) {
require.Greater(t, dbID, 0)
require.Greater(t, publicID, 0)
-
- res := sqlDB.QueryStr(t, `
- SELECT database_name, database_id, parent_schema_name, parent_schema_id, object_name, object_id, object_type
- FROM [SHOW BACKUP FROM LATEST IN $1 WITH debug_ids]
- ORDER BY object_id`, full)
-
- dbIDStr := strconv.Itoa(dbID)
- publicIDStr := strconv.Itoa(publicID)
- schemaIDStr := strconv.Itoa(dbID + 5)
-
- expectedObjects := [][]string{
- {"NULL", "NULL", "NULL", "NULL", "data", dbIDStr, "database"},
- {"data", dbIDStr, "NULL", "NULL", "public", strconv.Itoa(dbID + 1), "schema"},
- {"data", dbIDStr, "public", publicIDStr, "bank", strconv.Itoa(dbID + 2), "table"},
- {"data", dbIDStr, "public", publicIDStr, "welcome", strconv.Itoa(dbID + 3), "type"},
- {"data", dbIDStr, "public", publicIDStr, "_welcome", strconv.Itoa(dbID + 4), "type"},
- {"data", dbIDStr, "NULL", "NULL", "sc", schemaIDStr, "schema"},
- {"data", dbIDStr, "sc", schemaIDStr, "t1", strconv.Itoa(dbID + 6), "table"},
- {"data", dbIDStr, "sc", schemaIDStr, "t2", strconv.Itoa(dbID + 7), "table"},
- }
-
- require.Equal(t, expectedObjects, res)
-
}
func TestShowBackupPathIsCollectionRoot(t *testing.T) {
diff --git a/pkg/ccl/crosscluster/BUILD.bazel b/pkg/ccl/crosscluster/BUILD.bazel
index b669e5b4bada..1da9bee6c2c3 100644
--- a/pkg/ccl/crosscluster/BUILD.bazel
+++ b/pkg/ccl/crosscluster/BUILD.bazel
@@ -4,6 +4,7 @@ go_library(
name = "crosscluster",
srcs = [
"addresses.go",
+ "crosscluster_type_resolver.go",
"errors.go",
"event.go",
"settings.go",
@@ -15,5 +16,15 @@ go_library(
"//pkg/repstream/streampb",
"//pkg/roachpb",
"//pkg/settings",
+ "//pkg/sql/catalog",
+ "//pkg/sql/catalog/descpb",
+ "//pkg/sql/catalog/typedesc",
+ "//pkg/sql/pgwire/pgcode",
+ "//pkg/sql/pgwire/pgerror",
+ "//pkg/sql/sem/tree",
+ "//pkg/sql/sqlerrors",
+ "//pkg/sql/types",
+ "@com_github_cockroachdb_errors//:errors",
+ "@com_github_lib_pq//oid",
],
)
diff --git a/pkg/sql/importer/import_type_resolver.go b/pkg/ccl/crosscluster/crosscluster_type_resolver.go
similarity index 80%
rename from pkg/sql/importer/import_type_resolver.go
rename to pkg/ccl/crosscluster/crosscluster_type_resolver.go
index 061bf464c009..840f8618389c 100644
--- a/pkg/sql/importer/import_type_resolver.go
+++ b/pkg/ccl/crosscluster/crosscluster_type_resolver.go
@@ -1,9 +1,9 @@
-// Copyright 2017 The Cockroach Authors.
+// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.
-package importer
+package crosscluster
import (
"context"
@@ -20,16 +20,18 @@ import (
"github.com/lib/pq/oid"
)
-type ImportTypeResolver struct {
+// CrossClusterTypeResolver is meant to be used to resolve types using type
+// descriptors that originate from a different cluster.
+type CrossClusterTypeResolver struct {
typeIDToDesc map[descpb.ID]*descpb.TypeDescriptor
typeNameToDesc map[string][]*descpb.TypeDescriptor
}
-var _ tree.TypeReferenceResolver = ImportTypeResolver{}
-var _ catalog.TypeDescriptorResolver = ImportTypeResolver{}
+var _ tree.TypeReferenceResolver = CrossClusterTypeResolver{}
+var _ catalog.TypeDescriptorResolver = CrossClusterTypeResolver{}
-func MakeImportTypeResolver(typeDescs []*descpb.TypeDescriptor) ImportTypeResolver {
- itr := ImportTypeResolver{
+func MakeCrossClusterTypeResolver(typeDescs []*descpb.TypeDescriptor) CrossClusterTypeResolver {
+ itr := CrossClusterTypeResolver{
typeIDToDesc: make(map[descpb.ID]*descpb.TypeDescriptor),
typeNameToDesc: make(map[string][]*descpb.TypeDescriptor),
}
@@ -52,7 +54,7 @@ func MakeImportTypeResolver(typeDescs []*descpb.TypeDescriptor) ImportTypeResolv
// Note that if a table happens to have multiple types with the same name (but
// different schemas), this implementation will return a "feature unsupported"
// error.
-func (i ImportTypeResolver) ResolveType(
+func (i CrossClusterTypeResolver) ResolveType(
ctx context.Context, name *tree.UnresolvedObjectName,
) (*types.T, error) {
var descs []*descpb.TypeDescriptor
@@ -75,12 +77,14 @@ func (i ImportTypeResolver) ResolveType(
}
// ResolveTypeByOID implements the tree.TypeReferenceResolver interface.
-func (i ImportTypeResolver) ResolveTypeByOID(ctx context.Context, oid oid.Oid) (*types.T, error) {
+func (i CrossClusterTypeResolver) ResolveTypeByOID(
+ ctx context.Context, oid oid.Oid,
+) (*types.T, error) {
return typedesc.ResolveHydratedTByOID(ctx, oid, i)
}
// GetTypeDescriptor implements the catalog.TypeDescriptorResolver interface.
-func (i ImportTypeResolver) GetTypeDescriptor(
+func (i CrossClusterTypeResolver) GetTypeDescriptor(
_ context.Context, id descpb.ID,
) (tree.TypeName, catalog.TypeDescriptor, error) {
var desc *descpb.TypeDescriptor
diff --git a/pkg/ccl/crosscluster/logical/BUILD.bazel b/pkg/ccl/crosscluster/logical/BUILD.bazel
index 42b6731903a4..e9b64205f66a 100644
--- a/pkg/ccl/crosscluster/logical/BUILD.bazel
+++ b/pkg/ccl/crosscluster/logical/BUILD.bazel
@@ -50,7 +50,6 @@ go_library(
"//pkg/sql/execinfra",
"//pkg/sql/execinfrapb",
"//pkg/sql/exprutil",
- "//pkg/sql/importer",
"//pkg/sql/isql",
"//pkg/sql/lexbase",
"//pkg/sql/parser",
diff --git a/pkg/ccl/crosscluster/logical/create_logical_replication_stmt.go b/pkg/ccl/crosscluster/logical/create_logical_replication_stmt.go
index 81713260cb3c..a98570a46b29 100644
--- a/pkg/ccl/crosscluster/logical/create_logical_replication_stmt.go
+++ b/pkg/ccl/crosscluster/logical/create_logical_replication_stmt.go
@@ -208,6 +208,12 @@ func createLogicalReplicationStreamPlanHook(
return err
}
+ sourceTypes := make([]*descpb.TypeDescriptor, len(spec.TypeDescriptors))
+ for i, desc := range spec.TypeDescriptors {
+ sourceTypes[i] = &desc
+ }
+ crossClusterResolver := crosscluster.MakeCrossClusterTypeResolver(sourceTypes)
+
// If the user asked to ignore "ttl-deletes", make sure that at least one of
// the source tables actually has a TTL job which sets the omit bit that
// is used for filtering; if not, they probably forgot that step.
@@ -215,7 +221,11 @@ func createLogicalReplicationStreamPlanHook(
for i, name := range srcTableNames {
td := spec.TableDescriptors[name]
- srcTableDescs[i] = &td
+ cpy := tabledesc.NewBuilder(&td).BuildCreatedMutableTable()
+ if err := typedesc.HydrateTypesInDescriptor(ctx, cpy, crossClusterResolver); err != nil {
+ return err
+ }
+ srcTableDescs[i] = cpy.TableDesc()
repPairs[i].SrcDescriptorID = int32(td.ID)
if td.RowLevelTTL != nil && td.RowLevelTTL.DisableChangefeedReplication {
throwNoTTLWithCDCIgnoreError = false
diff --git a/pkg/ccl/crosscluster/logical/logical_replication_job.go b/pkg/ccl/crosscluster/logical/logical_replication_job.go
index 9f29c615ff1a..08ccfede3ef8 100644
--- a/pkg/ccl/crosscluster/logical/logical_replication_job.go
+++ b/pkg/ccl/crosscluster/logical/logical_replication_job.go
@@ -29,7 +29,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/typedesc"
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
- "github.com/cockroachdb/cockroach/pkg/sql/importer"
"github.com/cockroachdb/cockroach/pkg/sql/isql"
"github.com/cockroachdb/cockroach/pkg/sql/physicalplan"
"github.com/cockroachdb/cockroach/pkg/sql/sem/catid"
@@ -359,14 +358,13 @@ func (p *logicalReplicationPlanner) generatePlanImpl(
defaultFnOID = catid.FuncIDToOID(catid.DescID(defaultFnID))
}
- // TODO(msbutler): is this import type resolver kosher? Should put in a new package.
- importResolver := importer.MakeImportTypeResolver(plan.SourceTypes)
+ crossClusterResolver := crosscluster.MakeCrossClusterTypeResolver(plan.SourceTypes)
tableMetadataByDestID := make(map[int32]execinfrapb.TableReplicationMetadata)
if err := sql.DescsTxn(ctx, execCfg, func(ctx context.Context, txn isql.Txn, descriptors *descs.Collection) error {
for _, pair := range payload.ReplicationPairs {
srcTableDesc := plan.DescriptorMap[pair.SrcDescriptorID]
cpy := tabledesc.NewBuilder(&srcTableDesc).BuildCreatedMutableTable()
- if err := typedesc.HydrateTypesInDescriptor(ctx, cpy, importResolver); err != nil {
+ if err := typedesc.HydrateTypesInDescriptor(ctx, cpy, crossClusterResolver); err != nil {
return err
}
srcTableDesc = *cpy.TableDesc()
diff --git a/pkg/ccl/crosscluster/logical/logical_replication_job_test.go b/pkg/ccl/crosscluster/logical/logical_replication_job_test.go
index ca52a991af07..2f32e2876764 100644
--- a/pkg/ccl/crosscluster/logical/logical_replication_job_test.go
+++ b/pkg/ccl/crosscluster/logical/logical_replication_job_test.go
@@ -1953,20 +1953,35 @@ func TestUserDefinedTypes(t *testing.T) {
// Create the same user-defined type both tables.
dbA.Exec(t, "CREATE TYPE my_enum AS ENUM ('one', 'two', 'three')")
dbB.Exec(t, "CREATE TYPE my_enum AS ENUM ('one', 'two', 'three')")
+ dbA.Exec(t, "CREATE TYPE my_composite AS (a INT, b TEXT)")
+ dbB.Exec(t, "CREATE TYPE my_composite AS (a INT, b TEXT)")
- dbA.Exec(t, "CREATE TABLE data (pk INT PRIMARY KEY, val my_enum DEFAULT 'two')")
- dbB.Exec(t, "CREATE TABLE data (pk INT PRIMARY KEY, val my_enum DEFAULT 'two')")
-
- dbB.Exec(t, "INSERT INTO data VALUES (1, 'one')")
- // Force default expression evaluation.
- dbB.Exec(t, "INSERT INTO data VALUES (2)")
-
- var jobAID jobspb.JobID
- dbA.QueryRow(t, "CREATE LOGICAL REPLICATION STREAM FROM TABLE data ON $1 INTO TABLE data with skip schema check", dbBURL.String()).Scan(&jobAID)
- WaitUntilReplicatedTime(t, s.Clock().Now(), dbA, jobAID)
- require.NoError(t, replicationtestutils.CheckEmptyDLQs(ctx, dbA.DB, "A"))
- dbB.CheckQueryResults(t, "SELECT * FROM data", [][]string{{"1", "one"}, {"2", "two"}})
- dbA.CheckQueryResults(t, "SELECT * FROM data", [][]string{{"1", "one"}, {"2", "two"}})
+ for _, mode := range []string{"validated", "immediate"} {
+ t.Run(mode, func(t *testing.T) {
+ dbA.Exec(t, "CREATE TABLE data (pk INT PRIMARY KEY, val1 my_enum DEFAULT 'two', val2 my_composite)")
+ dbB.Exec(t, "CREATE TABLE data (pk INT PRIMARY KEY, val1 my_enum DEFAULT 'two', val2 my_composite)")
+
+ dbB.Exec(t, "INSERT INTO data VALUES (1, 'one', (3, 'cat'))")
+ // Force default expression evaluation.
+ dbB.Exec(t, "INSERT INTO data (pk, val2) VALUES (2, (4, 'dog'))")
+
+ var jobAID jobspb.JobID
+ dbA.QueryRow(t,
+ fmt.Sprintf("CREATE LOGICAL REPLICATION STREAM FROM TABLE data ON $1 INTO TABLE data WITH mode = %s", mode),
+ dbBURL.String(),
+ ).Scan(&jobAID)
+ WaitUntilReplicatedTime(t, s.Clock().Now(), dbA, jobAID)
+ require.NoError(t, replicationtestutils.CheckEmptyDLQs(ctx, dbA.DB, "A"))
+ dbB.CheckQueryResults(t, "SELECT * FROM data", [][]string{{"1", "one", "(3,cat)"}, {"2", "two", "(4,dog)"}})
+ dbA.CheckQueryResults(t, "SELECT * FROM data", [][]string{{"1", "one", "(3,cat)"}, {"2", "two", "(4,dog)"}})
+
+ dbA.Exec(t, "CANCEL JOB $1", jobAID)
+ jobutils.WaitForJobToCancel(t, dbA, jobAID)
+
+ dbA.Exec(t, "DROP TABLE data")
+ dbB.Exec(t, "DROP TABLE data")
+ })
+ }
}
// TestLogicalReplicationCreationChecks verifies that we check that the table
@@ -2075,7 +2090,7 @@ func TestLogicalReplicationCreationChecks(t *testing.T) {
`cannot create logical replication stream: destination table tab CHECK constraints do not match source table tab`,
"CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(),
)
- // Allos user to create LDR stream with mismatched CHECK via SKIP SCHEMA CHECK.
+ // Allow user to create LDR stream with mismatched CHECK via SKIP SCHEMA CHECK.
var jobIDSkipSchemaCheck jobspb.JobID
dbA.QueryRow(t,
"CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab WITH SKIP SCHEMA CHECK",
@@ -2097,13 +2112,45 @@ func TestLogicalReplicationCreationChecks(t *testing.T) {
dbA.Exec(t, "CANCEL JOB $1", jobAID)
jobutils.WaitForJobToCancel(t, dbA, jobAID)
- // Verify that the stream cannot be created with user defined types.
+ // Check if the table references a UDF.
+ dbA.Exec(t, "CREATE OR REPLACE FUNCTION my_udf() RETURNS INT AS $$ SELECT 1 $$ LANGUAGE SQL")
+ dbA.Exec(t, "ALTER TABLE tab ADD COLUMN udf_col INT NOT NULL")
+ dbA.Exec(t, "ALTER TABLE tab ALTER COLUMN udf_col SET DEFAULT my_udf()")
+ dbB.Exec(t, "ALTER TABLE tab ADD COLUMN udf_col INT NOT NULL DEFAULT 1")
+ dbA.ExpectErr(t,
+ `cannot create logical replication stream: table tab references functions with IDs \[[0-9]+\]`,
+ "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(),
+ )
+
+ // Check if the table references a sequence.
+ dbA.Exec(t, "ALTER TABLE tab DROP COLUMN udf_col")
+ dbB.Exec(t, "ALTER TABLE tab DROP COLUMN udf_col")
+ dbA.Exec(t, "CREATE SEQUENCE my_seq")
+ dbA.Exec(t, "ALTER TABLE tab ADD COLUMN seq_col INT NOT NULL DEFAULT nextval('my_seq')")
+ dbB.Exec(t, "ALTER TABLE tab ADD COLUMN seq_col INT NOT NULL DEFAULT 1")
+ dbA.ExpectErr(t,
+ `cannot create logical replication stream: table tab references sequences with IDs \[[0-9]+\]`,
+ "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(),
+ )
+
+ // Check if table has a trigger.
+ dbA.Exec(t, "ALTER TABLE tab DROP COLUMN seq_col")
+ dbB.Exec(t, "ALTER TABLE tab DROP COLUMN seq_col")
+ dbA.Exec(t, "CREATE OR REPLACE FUNCTION my_trigger() RETURNS TRIGGER AS $$ BEGIN RETURN NEW; END $$ LANGUAGE PLPGSQL")
+ dbA.Exec(t, "CREATE TRIGGER my_trigger BEFORE INSERT ON tab FOR EACH ROW EXECUTE FUNCTION my_trigger()")
+ dbA.ExpectErr(t,
+ `cannot create logical replication stream: table tab references triggers \[my_trigger\]`,
+ "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(),
+ )
+
+ // Verify that the stream cannot be created with mismatched enum types.
+ dbA.Exec(t, "DROP TRIGGER my_trigger ON tab")
dbA.Exec(t, "CREATE TYPE mytype AS ENUM ('a', 'b', 'c')")
- dbB.Exec(t, "CREATE TYPE b.mytype AS ENUM ('a', 'b', 'c')")
+ dbB.Exec(t, "CREATE TYPE b.mytype AS ENUM ('a', 'b')")
dbA.Exec(t, "ALTER TABLE tab ADD COLUMN enum_col mytype NOT NULL")
dbB.Exec(t, "ALTER TABLE b.tab ADD COLUMN enum_col b.mytype NOT NULL")
dbA.ExpectErr(t,
- `cannot create logical replication stream: destination table tab column enum_col has user-defined type USER DEFINED ENUM: public.mytype`,
+ `cannot create logical replication stream: .* destination type USER DEFINED ENUM: public.mytype has logical representations \[a b c\], but the source type USER DEFINED ENUM: mytype has \[a b\]`,
"CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(),
)
// Allows user to create LDR stream with UDT via SKIP SCHEMA CHECK.
@@ -2114,9 +2161,21 @@ func TestLogicalReplicationCreationChecks(t *testing.T) {
dbA.Exec(t, "CANCEL JOB $1", jobIDSkipSchemaCheck)
jobutils.WaitForJobToCancel(t, dbA, jobIDSkipSchemaCheck)
- // Check that UNIQUE indexes match.
+ // Verify that the stream cannot be created with mismatched composite types.
dbA.Exec(t, "ALTER TABLE tab DROP COLUMN enum_col")
dbB.Exec(t, "ALTER TABLE b.tab DROP COLUMN enum_col")
+ dbA.Exec(t, "CREATE TYPE composite_typ AS (a INT, b TEXT)")
+ dbB.Exec(t, "CREATE TYPE b.composite_typ AS (a TEXT, b INT)")
+ dbA.Exec(t, "ALTER TABLE tab ADD COLUMN composite_udt_col composite_typ NOT NULL")
+ dbB.Exec(t, "ALTER TABLE b.tab ADD COLUMN composite_udt_col b.composite_typ NOT NULL")
+ dbA.ExpectErr(t,
+ `cannot create logical replication stream: .* destination type USER DEFINED RECORD: public.composite_typ tuple element 0 does not match source type USER DEFINED RECORD: composite_typ tuple element 0: destination type INT8 does not match source type STRING`,
+ "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(),
+ )
+
+ // Check that UNIQUE indexes match.
+ dbA.Exec(t, "ALTER TABLE tab DROP COLUMN composite_udt_col")
+ dbB.Exec(t, "ALTER TABLE b.tab DROP COLUMN composite_udt_col")
dbA.Exec(t, "CREATE UNIQUE INDEX payload_idx ON tab(payload)")
dbB.Exec(t, "CREATE UNIQUE INDEX multi_idx ON b.tab(composite_col, pk)")
dbA.ExpectErr(t,
@@ -2134,6 +2193,50 @@ func TestLogicalReplicationCreationChecks(t *testing.T) {
dbBURL.String(),
).Scan(&jobAID)
+ // Verify that unsupported CREATE INDEX statements are blocked.
+ dbA.ExpectErr(t,
+ "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs",
+ "CREATE INDEX virtual_col_idx ON tab(virtual_col)",
+ )
+ dbA.ExpectErr(t,
+ "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs",
+ "CREATE INDEX hash_idx ON tab(pk) USING HASH WITH (bucket_count = 4)",
+ )
+ dbA.ExpectErr(t,
+ "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs",
+ "CREATE INDEX partial_idx ON tab(composite_col) WHERE pk > 0",
+ )
+ dbA.ExpectErr(t,
+ "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs",
+ "CREATE UNIQUE INDEX unique_idx ON tab(composite_col)",
+ )
+
+ // Creating triggers is also blocked.
+ dbA.ExpectErr(t,
+ "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs",
+ "CREATE TRIGGER my_trigger BEFORE INSERT ON tab FOR EACH ROW EXECUTE FUNCTION my_trigger()",
+ )
+
+ // Creating a "normal" secondary index (and dropping it) is allowed.
+ dbA.Exec(t, "CREATE INDEX normal_idx ON tab(composite_col)")
+ dbA.Exec(t, "DROP INDEX normal_idx")
+
+ // Changing safe table storage parameters is allowed.
+ dbA.ExpectErr(t,
+ "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs",
+ "ALTER TABLE tab SET (ttl = 'on', ttl_expire_after = '5m')",
+ )
+ dbA.Exec(t, "ALTER TABLE tab SET (ttl = 'on', ttl_expiration_expression = $$ '2024-01-01 12:00:00'::TIMESTAMPTZ $$)")
+ dbA.ExpectErr(t,
+ "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs",
+ "ALTER TABLE tab RESET (ttl)",
+ )
+ // Storage param updates are only allowed if it is the only change.
+ dbA.ExpectErr(t,
+ "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs",
+ "ALTER TABLE tab ADD COLUMN c INT, SET (fillfactor = 70)",
+ )
+
// Kill replication job.
dbA.Exec(t, "CANCEL JOB $1", jobAID)
jobutils.WaitForJobToCancel(t, dbA, jobAID)
diff --git a/pkg/ccl/crosscluster/logical/lww_row_processor.go b/pkg/ccl/crosscluster/logical/lww_row_processor.go
index 645ec83d83ec..63122a4b469a 100644
--- a/pkg/ccl/crosscluster/logical/lww_row_processor.go
+++ b/pkg/ccl/crosscluster/logical/lww_row_processor.go
@@ -32,6 +32,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
+ "github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metamorphic"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
@@ -90,6 +91,12 @@ func (q *queryBuilder) AddRow(row cdcevent.Row) error {
return err
}
if err := it.Datum(func(d tree.Datum, col cdcevent.ResultColumn) error {
+ if dEnum, ok := d.(*tree.DEnum); ok {
+ // Override the type to Unknown to avoid a mismatched type OID error
+ // during execution. Note that Unknown is the type used by default
+ // when a SQL statement is executed without type hints.
+ dEnum.EnumTyp = types.Unknown
+ }
q.scratchDatums = append(q.scratchDatums, d)
return nil
}); err != nil {
@@ -116,6 +123,12 @@ func (q *queryBuilder) AddRowDefaultNull(row *cdcevent.Row) error {
continue
}
if err := it.Datum(func(d tree.Datum, col cdcevent.ResultColumn) error {
+ if dEnum, ok := d.(*tree.DEnum); ok {
+ // Override the type to Unknown to avoid a mismatched type OID error
+ // during execution. Note that Unknown is the type used by default
+ // when a SQL statement is executed without type hints.
+ dEnum.EnumTyp = types.Unknown
+ }
q.scratchDatums = append(q.scratchDatums, d)
return nil
}); err != nil {
diff --git a/pkg/ccl/crosscluster/streamclient/partitioned_stream_client.go b/pkg/ccl/crosscluster/streamclient/partitioned_stream_client.go
index 529d8408527d..d4ba69a4d9d2 100644
--- a/pkg/ccl/crosscluster/streamclient/partitioned_stream_client.go
+++ b/pkg/ccl/crosscluster/streamclient/partitioned_stream_client.go
@@ -343,8 +343,8 @@ func (p *partitionedStreamClient) PlanLogicalReplication(
}
sourceTypes := make([]*descpb.TypeDescriptor, len(streamSpec.TypeDescriptors))
- for _, desc := range streamSpec.TypeDescriptors {
- sourceTypes = append(sourceTypes, &desc)
+ for i, desc := range streamSpec.TypeDescriptors {
+ sourceTypes[i] = &desc
}
return LogicalReplicationPlan{
diff --git a/pkg/ccl/logictestccl/testdata/logic_test/generic b/pkg/ccl/logictestccl/testdata/logic_test/generic
index 175591a55e64..bc9493959480 100644
--- a/pkg/ccl/logictestccl/testdata/logic_test/generic
+++ b/pkg/ccl/logictestccl/testdata/logic_test/generic
@@ -1193,30 +1193,3 @@ quality of service: regular
regions:
actual row count: 1
size: 1 column, 1 row
-
-statement ok
-DEALLOCATE p
-
-# Regression test for #132963. Do not cache non-reusable plans.
-statement ok
-SET plan_cache_mode = auto
-
-statement ok
-CREATE TABLE a (a INT PRIMARY KEY)
-
-statement ok
-PREPARE p AS SELECT create_statement FROM [SHOW CREATE TABLE a]
-
-query T
-EXECUTE p
-----
-CREATE TABLE public.a (
- a INT8 NOT NULL,
- CONSTRAINT a_pkey PRIMARY KEY (a ASC)
-)
-
-statement ok
-ALTER TABLE a RENAME TO b
-
-statement error pgcode 42P01 pq: relation \"a\" does not exist
-EXECUTE p
diff --git a/pkg/cli/testdata/declarative-rules/deprules b/pkg/cli/testdata/declarative-rules/deprules
index 844da777f531..9cf90b386e04 100644
--- a/pkg/cli/testdata/declarative-rules/deprules
+++ b/pkg/cli/testdata/declarative-rules/deprules
@@ -1,6 +1,6 @@
dep
----
-debug declarative-print-rules 1000024.2 dep
+debug declarative-print-rules 24.2 dep
deprules
----
- name: 'CheckConstraint transitions to ABSENT uphold 2-version invariant: PUBLIC->VALIDATED'
diff --git a/pkg/cli/testdata/declarative-rules/invalid_version b/pkg/cli/testdata/declarative-rules/invalid_version
index 84e28c5287b1..231ea4f78b35 100644
--- a/pkg/cli/testdata/declarative-rules/invalid_version
+++ b/pkg/cli/testdata/declarative-rules/invalid_version
@@ -4,5 +4,5 @@ invalid_version
debug declarative-print-rules 1.1 op
unsupported version number, the supported versions are:
latest
- 1000024.2
- 1000024.1
+ 24.2
+ 24.1
diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go
index a006e6e01781..cf4e79805de8 100644
--- a/pkg/clusterversion/cockroach_versions.go
+++ b/pkg/clusterversion/cockroach_versions.go
@@ -352,7 +352,7 @@ const V24_3 = Latest
// binary in a dev cluster.
//
// See devOffsetKeyStart for more details.
-const DevelopmentBranch = true
+const DevelopmentBranch = false
// finalVersion should be set on a release branch to the minted final cluster
// version key, e.g. to V23_2 on the release-23.2 branch once it is minted.
diff --git a/pkg/cmd/drtprod/configs/drt_chaos.yaml b/pkg/cmd/drtprod/configs/drt_chaos.yaml
index f55e5789627a..a3e62fbb3b90 100644
--- a/pkg/cmd/drtprod/configs/drt_chaos.yaml
+++ b/pkg/cmd/drtprod/configs/drt_chaos.yaml
@@ -88,9 +88,5 @@ targets:
- workload
- script: "pkg/cmd/drtprod/scripts/setup_datadog_workload"
- script: "pkg/cmd/drtprod/scripts/tpcc_init.sh"
- args:
- - cct_tpcc # suffix added to script name tpcc_init_cct_tpcc.sh
- - true # determines whether to execute the script immediately on workload node
flags:
warehouses: 12000
- db: cct_tpcc
diff --git a/pkg/cmd/drtprod/configs/drt_large.yaml b/pkg/cmd/drtprod/configs/drt_large.yaml
index 4bc9ec6d7a6d..a6080097bc7e 100644
--- a/pkg/cmd/drtprod/configs/drt_large.yaml
+++ b/pkg/cmd/drtprod/configs/drt_large.yaml
@@ -101,9 +101,5 @@ targets:
- workload
- script: "pkg/cmd/drtprod/scripts/setup_datadog_workload"
- script: "pkg/cmd/drtprod/scripts/tpcc_init.sh"
- args:
- - cct_tpcc # suffix added to script name tpcc_init_cct_tpcc.sh
- - true # determines whether to execute the script immediately on workload node
flags:
warehouses: 15000
- db: cct_tpcc
diff --git a/pkg/cmd/drtprod/configs/drt_scale.yaml b/pkg/cmd/drtprod/configs/drt_scale.yaml
index d8a24e137888..a2977aa26196 100644
--- a/pkg/cmd/drtprod/configs/drt_scale.yaml
+++ b/pkg/cmd/drtprod/configs/drt_scale.yaml
@@ -123,16 +123,6 @@ targets:
- pkg/cmd/drt/scripts/roachtest_operations_run.sh
- roachtest_operations_run.sh
- script: "pkg/cmd/drtprod/scripts/tpcc_init.sh"
- args:
- - cct_tpcc_320k # suffix added to script name tpcc_init_cct_tpcc_320k.sh
- - true # determines whether to execute the script immediately on workload node
flags:
- warehouses: 320000
+ warehouses: 100000
db: cct_tpcc
- - script: "pkg/cmd/drtprod/scripts/tpcc_init.sh"
- args:
- - cct_tpcc_640k # suffix added to script name tpcc_init_cct_tpcc_640k.sh
- - false # determines whether to execute the script immediately on workload node
- flags:
- warehouses: 640000
- db: cct_tpcc_big
diff --git a/pkg/cmd/drtprod/configs/drt_scale_operations.yaml b/pkg/cmd/drtprod/configs/drt_scale_operations.yaml
index 412995b46acc..becfa51be963 100644
--- a/pkg/cmd/drtprod/configs/drt_scale_operations.yaml
+++ b/pkg/cmd/drtprod/configs/drt_scale_operations.yaml
@@ -13,5 +13,5 @@ targets:
steps:
- script: "pkg/cmd/drtprod/scripts/create_run_operation.sh"
args:
- - "schema_change,add-column|add-index"
+ - "schema_change,add-column|add-index,0 0 * * *" # runs every day at 12 AM
- "kill_stall,disk-stall|network-partition|node-kill,0 * * * *" # runs every 1 hour
diff --git a/pkg/cmd/drtprod/scripts/tpcc_init.sh b/pkg/cmd/drtprod/scripts/tpcc_init.sh
index 851f30e4106d..a9122a6c7cf6 100755
--- a/pkg/cmd/drtprod/scripts/tpcc_init.sh
+++ b/pkg/cmd/drtprod/scripts/tpcc_init.sh
@@ -9,23 +9,6 @@
# The --warehouses and other flags for import are passed as argument to this script
# NOTE - This uses CLUSTER and WORKLOAD_CLUSTER environment variable, if not set the script fails
-# The first argument is the name suffix that is added to the script as tpcc_init_.sh
-if [ "$#" -lt 4 ]; then
- echo "Usage: $0 "
- exit 1
-fi
-suffix=$1
-shift
-# The second argument represents whether the init process should be started in the workload cluster
-# The value is true or false
-if [ "$1" != "true" ] && [ "$1" != "false" ]; then
- # $1 is used again because of the shift
- echo "Error: The second argument must be 'true' or 'false' which implies whether the script should be started in background or not."
- exit 1
-fi
-execute_script=$1
-shift
-
if [ -z "${CLUSTER}" ]; then
echo "environment CLUSTER is not set"
exit 1
@@ -36,22 +19,19 @@ if [ -z "${WORKLOAD_CLUSTER}" ]; then
exit 1
fi
-absolute_path=$(roachprod run "${WORKLOAD_CLUSTER}":1 -- "realpath ./tpcc_init_${suffix}.sh")
+absolute_path=$(roachprod run "${WORKLOAD_CLUSTER}":1 -- "realpath ./tpcc_init.sh")
pwd=$(roachprod run "${WORKLOAD_CLUSTER}":1 -- "dirname ${absolute_path}")
-PGURLS=$(roachprod pgurl "${CLUSTER}")
# script is responsible for importing the tpcc database for workload
-roachprod ssh "${WORKLOAD_CLUSTER}":1 -- "tee tpcc_init_${suffix}.sh > /dev/null << 'EOF'
+roachprod ssh "${WORKLOAD_CLUSTER}":1 -- "tee tpcc_init.sh > /dev/null << 'EOF'
#!/bin/bash
export ROACHPROD_GCE_DEFAULT_PROJECT=${ROACHPROD_GCE_DEFAULT_PROJECT}
export ROACHPROD_DNS=${ROACHPROD_DNS}
${pwd}/roachprod sync
sleep 20
-${pwd}/cockroach workload init tpcc $@ --secure --families $PGURLS
+PGURLS=\$(${pwd}/roachprod pgurl ${CLUSTER} | sed s/\'//g)
+${pwd}/cockroach workload init tpcc $@ --secure --families \$PGURLS
EOF"
-roachprod ssh "${WORKLOAD_CLUSTER}":1 -- "chmod +x tpcc_init_${suffix}.sh"
-
-if [ "$execute_script" = "true" ]; then
- roachprod run "${WORKLOAD_CLUSTER}":1 -- "sudo systemd-run --unit tpccinit_${suffix} --same-dir --uid \$(id -u) --gid \$(id -g) bash ${pwd}/tpcc_init_${suffix}.sh"
-fi
+roachprod ssh "${WORKLOAD_CLUSTER}":1 -- "chmod +x tpcc_init.sh"
+roachprod run "${WORKLOAD_CLUSTER}":1 -- "sudo systemd-run --unit tpccinit --same-dir --uid \$(id -u) --gid \$(id -g) bash ${pwd}/tpcc_init.sh"
diff --git a/pkg/cmd/roachtest/operations/add_column.go b/pkg/cmd/roachtest/operations/add_column.go
index 68d64217bbc5..10027e2d1edf 100644
--- a/pkg/cmd/roachtest/operations/add_column.go
+++ b/pkg/cmd/roachtest/operations/add_column.go
@@ -20,7 +20,6 @@ import (
type cleanupAddedColumn struct {
db, table, column string
- locked bool
}
func (cl *cleanupAddedColumn) Cleanup(
@@ -29,10 +28,6 @@ func (cl *cleanupAddedColumn) Cleanup(
conn := c.Conn(ctx, o.L(), 1, option.VirtualClusterName(roachtestflags.VirtualCluster))
defer conn.Close()
- if cl.locked {
- setSchemaLocked(ctx, o, conn, cl.db, cl.table, false /* lock */)
- defer setSchemaLocked(ctx, o, conn, cl.db, cl.table, true /* lock */)
- }
o.Status(fmt.Sprintf("dropping column %s", cl.column))
_, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.%s DROP COLUMN %s CASCADE", cl.db, cl.table, cl.column))
if err != nil {
@@ -63,17 +58,6 @@ func runAddColumn(
colQualification += " NOT NULL"
}
- // If the table's schema is locked, then unlock the table and make sure it will
- // be re-locked during cleanup.
- // TODO(#129694): Remove schema unlocking/re-locking once automation is internalized.
- locked := isSchemaLocked(o, conn, dbName, tableName)
- if locked {
- setSchemaLocked(ctx, o, conn, dbName, tableName, false /* lock */)
- // Re-lock the table if necessary, so that it stays locked during any wait
- // period before cleanup.
- defer setSchemaLocked(ctx, o, conn, dbName, tableName, true /* lock */)
- }
-
o.Status(fmt.Sprintf("adding column %s to table %s.%s", colName, dbName, tableName))
addColStmt := fmt.Sprintf("ALTER TABLE %s.%s ADD COLUMN %s VARCHAR %s", dbName, tableName, colName, colQualification)
_, err := conn.ExecContext(ctx, addColStmt)
@@ -82,12 +66,10 @@ func runAddColumn(
}
o.Status(fmt.Sprintf("column %s created", colName))
-
return &cleanupAddedColumn{
db: dbName,
table: tableName,
column: colName,
- locked: locked,
}
}
diff --git a/pkg/cmd/roachtest/operations/add_index.go b/pkg/cmd/roachtest/operations/add_index.go
index e67b8f53772c..72f7957954eb 100644
--- a/pkg/cmd/roachtest/operations/add_index.go
+++ b/pkg/cmd/roachtest/operations/add_index.go
@@ -20,7 +20,6 @@ import (
type cleanupAddedIndex struct {
db, table, index string
- locked bool
}
func (cl *cleanupAddedIndex) Cleanup(
@@ -29,10 +28,6 @@ func (cl *cleanupAddedIndex) Cleanup(
conn := c.Conn(ctx, o.L(), 1, option.VirtualClusterName(roachtestflags.VirtualCluster))
defer conn.Close()
- if cl.locked {
- setSchemaLocked(ctx, o, conn, cl.db, cl.table, false /* lock */)
- defer setSchemaLocked(ctx, o, conn, cl.db, cl.table, true /* lock */)
- }
o.Status(fmt.Sprintf("dropping index %s", cl.index))
_, err := conn.ExecContext(ctx, fmt.Sprintf("DROP INDEX %s.%s@%s", cl.db, cl.table, cl.index))
if err != nil {
@@ -63,15 +58,6 @@ func runAddIndex(
o.Fatal(err)
}
- // If the table's schema is locked, then unlock the table and make sure it will
- // be re-locked during cleanup.
- // TODO(#129694): Remove schema unlocking/re-locking once automation is internalized.
- locked := isSchemaLocked(o, conn, dbName, tableName)
- if locked {
- setSchemaLocked(ctx, o, conn, dbName, tableName, false /* lock */)
- defer setSchemaLocked(ctx, o, conn, dbName, tableName, true /* lock */)
- }
-
indexName := fmt.Sprintf("add_index_op_%d", rng.Uint32())
o.Status(fmt.Sprintf("adding index to column %s in table %s.%s", colName, dbName, tableName))
createIndexStmt := fmt.Sprintf("CREATE INDEX %s ON %s.%s (%s)", indexName, dbName, tableName, colName)
@@ -81,12 +67,10 @@ func runAddIndex(
}
o.Status(fmt.Sprintf("index %s created", indexName))
-
return &cleanupAddedIndex{
- db: dbName,
- table: tableName,
- index: indexName,
- locked: locked,
+ db: dbName,
+ table: tableName,
+ index: indexName,
}
}
diff --git a/pkg/cmd/roachtest/operations/utils.go b/pkg/cmd/roachtest/operations/utils.go
index 5a507f4ab1c9..6b0f99a17ef9 100644
--- a/pkg/cmd/roachtest/operations/utils.go
+++ b/pkg/cmd/roachtest/operations/utils.go
@@ -9,7 +9,6 @@ import (
"context"
gosql "database/sql"
"fmt"
- "strings"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation"
@@ -156,26 +155,3 @@ func pickRandomStore(ctx context.Context, o operation.Operation, conn *gosql.DB,
}
return stores[rng.Intn(len(stores))]
}
-
-// Returns true if the schema_locked parameter is set on this table.
-func isSchemaLocked(o operation.Operation, conn *gosql.DB, db, tbl string) bool {
- showTblStmt := fmt.Sprintf("SHOW CREATE %s.%s", db, tbl)
- var tblName, createStmt string
- err := conn.QueryRow(showTblStmt).Scan(&tblName, &createStmt)
- if err != nil {
- o.Fatal(err)
- }
- return strings.Contains(createStmt, "schema_locked = true")
-}
-
-// Set the schema_locked storage parameter.
-func setSchemaLocked(
- ctx context.Context, o operation.Operation, conn *gosql.DB, db, tbl string, lock bool,
-) {
- stmt := fmt.Sprintf("ALTER TABLE %s.%s SET (schema_locked=%v)", db, tbl, lock)
- o.Status(fmt.Sprintf("setting schema_locked = %v on table %s.%s", lock, db, tbl))
- _, err := conn.ExecContext(ctx, stmt)
- if err != nil {
- o.Fatal(err)
- }
-}
diff --git a/pkg/cmd/roachtest/tests/activerecord.go b/pkg/cmd/roachtest/tests/activerecord.go
index 04e88e7b323a..a527a7f2cd7b 100644
--- a/pkg/cmd/roachtest/tests/activerecord.go
+++ b/pkg/cmd/roachtest/tests/activerecord.go
@@ -146,7 +146,7 @@ func registerActiveRecord(r registry.Registry) {
c,
node,
"installing bundler",
- `cd /mnt/data1/activerecord-cockroachdb-adapter/ && sudo gem install bundler:2.1.4`,
+ `cd /mnt/data1/activerecord-cockroachdb-adapter/ && sudo gem install bundler:2.4.9`,
); err != nil {
t.Fatal(err)
}
diff --git a/pkg/cmd/roachtest/tests/admission_control_latency.go b/pkg/cmd/roachtest/tests/admission_control_latency.go
index 036ce4dcf7e5..7793bc158aab 100644
--- a/pkg/cmd/roachtest/tests/admission_control_latency.go
+++ b/pkg/cmd/roachtest/tests/admission_control_latency.go
@@ -750,11 +750,6 @@ func (v variations) runTest(ctx context.Context, t test.Test, c cluster.Cluster)
`SET CLUSTER SETTING kv.lease.reject_on_leader_unknown.enabled = true`); err != nil {
t.Fatal(err)
}
- // Enable raft tracing. Remove this once raft tracing is the default.
- if _, err := db.ExecContext(ctx,
- `SET CLUSTER SETTING kv.raft.max_concurrent_traces = '10'`); err != nil {
- t.Fatal(err)
- }
// This isn't strictly necessary, but it would be nice if this test passed at 10s (or lower).
if _, err := db.ExecContext(ctx,
`SET CLUSTER SETTING server.time_after_store_suspect = '10s'`); err != nil {
diff --git a/pkg/cmd/roachtest/tests/follower_reads.go b/pkg/cmd/roachtest/tests/follower_reads.go
index 3a2febcff258..b31c0a2c32a8 100644
--- a/pkg/cmd/roachtest/tests/follower_reads.go
+++ b/pkg/cmd/roachtest/tests/follower_reads.go
@@ -1008,6 +1008,13 @@ func runFollowerReadsMixedVersionGlobalTableTest(
// Use a longer upgrade timeout to give the migrations enough time to finish
// considering the cross-region latency.
mixedversion.UpgradeTimeout(60*time.Minute),
+
+ // This test is flaky when upgrading from v23.1 to v23.2 for follower
+ // reads in shared-process deployments. There were a number of changes
+ // to tenant health checks since then which appear to have addressed
+ // this issue.
+ mixedversion.MinimumSupportedVersion("v23.2.0"),
+
// This test does not currently work with shared-process
// deployments (#129167), so we do not run it in separate-process
// mode either to reduce noise. We should reevaluate once the test
diff --git a/pkg/cmd/roachtest/tests/pgjdbc_blocklist.go b/pkg/cmd/roachtest/tests/pgjdbc_blocklist.go
index 04ed3d149b3c..2bb0583f28a3 100644
--- a/pkg/cmd/roachtest/tests/pgjdbc_blocklist.go
+++ b/pkg/cmd/roachtest/tests/pgjdbc_blocklist.go
@@ -399,7 +399,6 @@ var pgjdbcBlockList = blocklist{
`org.postgresql.test.jdbc2.ServerErrorTest.testNotNullConstraint`: "27796",
`org.postgresql.test.jdbc2.ServerErrorTest.testPrimaryKey`: "27796",
`org.postgresql.test.jdbc2.StatementTest.closeInProgressStatement()`: "unknown",
- `org.postgresql.test.jdbc2.StatementTest.concurrentWarningReadAndClear()`: "unknown",
`org.postgresql.test.jdbc2.StatementTest.fastCloses()`: "unknown",
`org.postgresql.test.jdbc2.StatementTest.parsingSemiColons()`: "unknown",
`org.postgresql.test.jdbc2.StatementTest.updateCount()`: "unknown",
diff --git a/pkg/cmd/roachtest/tests/ruby_pg.go b/pkg/cmd/roachtest/tests/ruby_pg.go
index 663d75c0fae9..ff521005612e 100644
--- a/pkg/cmd/roachtest/tests/ruby_pg.go
+++ b/pkg/cmd/roachtest/tests/ruby_pg.go
@@ -146,7 +146,7 @@ func registerRubyPG(r registry.Registry) {
c,
node,
"installing bundler",
- `cd /mnt/data1/ruby-pg/ && sudo gem install bundler:2.1.4`,
+ `cd /mnt/data1/ruby-pg/ && sudo gem install bundler:2.4.9`,
); err != nil {
t.Fatal(err)
}
diff --git a/pkg/kv/kvserver/BUILD.bazel b/pkg/kv/kvserver/BUILD.bazel
index bcf96f61a8d0..e1ff00259ba2 100644
--- a/pkg/kv/kvserver/BUILD.bazel
+++ b/pkg/kv/kvserver/BUILD.bazel
@@ -162,7 +162,6 @@ go_library(
"//pkg/kv/kvserver/multiqueue",
"//pkg/kv/kvserver/raftentry",
"//pkg/kv/kvserver/raftlog",
- "//pkg/kv/kvserver/rafttrace",
"//pkg/kv/kvserver/rangefeed",
"//pkg/kv/kvserver/rditer",
"//pkg/kv/kvserver/readsummary",
@@ -423,6 +422,7 @@ go_test(
"//pkg/kv/kvserver/kvflowcontrol/kvflowdispatch",
"//pkg/kv/kvserver/kvflowcontrol/kvflowinspectpb",
"//pkg/kv/kvserver/kvflowcontrol/node_rac2",
+ "//pkg/kv/kvserver/kvflowcontrol/rac2",
"//pkg/kv/kvserver/kvflowcontrol/replica_rac2",
"//pkg/kv/kvserver/kvserverbase",
"//pkg/kv/kvserver/kvserverpb",
@@ -439,7 +439,6 @@ go_test(
"//pkg/kv/kvserver/protectedts/ptutil",
"//pkg/kv/kvserver/raftentry",
"//pkg/kv/kvserver/raftlog",
- "//pkg/kv/kvserver/rafttrace",
"//pkg/kv/kvserver/raftutil",
"//pkg/kv/kvserver/rangefeed",
"//pkg/kv/kvserver/rditer",
diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go
index 2dd80ea16f67..ae0b218488d4 100644
--- a/pkg/kv/kvserver/client_merge_test.go
+++ b/pkg/kv/kvserver/client_merge_test.go
@@ -3171,7 +3171,6 @@ func TestMergeQueueWithExternalFiles(t *testing.T) {
store, err := s.GetStores().(*kvserver.Stores).GetStore(s.GetFirstStoreID())
require.NoError(t, err)
- store.SetMergeQueueActive(true)
if skipExternal {
verifyUnmergedSoon(t, store, lhsDesc.StartKey, rhsDesc.StartKey)
} else {
@@ -4293,6 +4292,11 @@ func TestStoreRangeMergeDuringShutdown(t *testing.T) {
func verifyMergedSoon(t *testing.T, store *kvserver.Store, lhsStartKey, rhsStartKey roachpb.RKey) {
t.Helper()
+ store.SetMergeQueueActive(true)
+ defer func() {
+ store.SetMergeQueueActive(false)
+ store.MustForceMergeScanAndProcess() // drain any merges that might already be queued
+ }()
testutils.SucceedsSoon(t, func() error {
store.MustForceMergeScanAndProcess()
repl := store.LookupReplica(rhsStartKey)
@@ -4310,6 +4314,11 @@ func verifyUnmergedSoon(
t *testing.T, store *kvserver.Store, lhsStartKey, rhsStartKey roachpb.RKey,
) {
t.Helper()
+ store.SetMergeQueueActive(true)
+ defer func() {
+ store.SetMergeQueueActive(false)
+ store.MustForceMergeScanAndProcess() // drain any merges that might already be queued
+ }()
testutils.SucceedsSoon(t, func() error {
store.MustForceMergeScanAndProcess()
repl := store.LookupReplica(rhsStartKey)
@@ -4344,9 +4353,6 @@ func TestMergeQueue(t *testing.T) {
WallClock: manualClock,
DefaultZoneConfigOverride: &zoneConfig,
},
- Store: &kvserver.StoreTestingKnobs{
- DisableScanner: true,
- },
},
},
})
@@ -4354,11 +4360,6 @@ func TestMergeQueue(t *testing.T) {
conf := zoneConfig.AsSpanConfig()
store := tc.GetFirstStoreFromServer(t, 0)
- // The cluster with manual replication disables the merge queue,
- // so we need to re-enable.
- _, err := tc.ServerConn(0).Exec(`SET CLUSTER SETTING kv.range_merge.queue.enabled = true`)
- require.NoError(t, err)
- store.SetMergeQueueActive(true)
split := func(t *testing.T, key roachpb.Key, expirationTime hlc.Timestamp) {
t.Helper()
@@ -4429,6 +4430,7 @@ func TestMergeQueue(t *testing.T) {
kvserver.SplitByLoadEnabled.Override(ctx, &s.ClusterSettings().SV, false)
}
+ store.SetMergeQueueActive(false) // reset merge queue to inactive
store.MustForceMergeScanAndProcess() // drain any merges that might already be queued
split(t, rhsStartKey.AsRawKey(), hlc.Timestamp{} /* expirationTime */)
}
@@ -4818,7 +4820,8 @@ func TestMergeQueueSeesNonVoters(t *testing.T) {
}
var clusterArgs = base.TestClusterArgs{
- // We dont want the replicate queue mucking with our test, so disable it.
+ // We don't want the replicate queue mucking with our test, so disable it.
+ // This also disables the merge queue, until it is manually enabled.
ReplicationMode: base.ReplicationManual,
ServerArgs: base.TestServerArgs{
Knobs: base.TestingKnobs{
@@ -4841,10 +4844,6 @@ func TestMergeQueueSeesNonVoters(t *testing.T) {
store, err := tc.Server(0).GetStores().(*kvserver.Stores).GetStore(1)
require.Nil(t, err)
- // We're going to split the dummy range created above with an empty
- // expiration time. Disable the merge queue before splitting so that the
- // split ranges aren't immediately merged.
- store.SetMergeQueueActive(false)
leftDesc, rightDesc := splitDummyRangeInTestCluster(
t, tc, dbName, "kv" /* tableName */, hlc.Timestamp{} /* splitExpirationTime */)
@@ -4887,7 +4886,6 @@ func TestMergeQueueSeesNonVoters(t *testing.T) {
tc.RemoveVotersOrFatal(t, rightDesc.StartKey.AsRawKey(), tc.Target(0))
rightDesc = tc.LookupRangeOrFatal(t, rightDesc.StartKey.AsRawKey())
- store.SetMergeQueueActive(true)
verifyMergedSoon(t, store, leftDesc.StartKey, rightDesc.StartKey)
})
}
@@ -4909,7 +4907,8 @@ func TestMergeQueueWithSlowNonVoterSnaps(t *testing.T) {
ctx := context.Background()
var delaySnapshotTrap atomic.Value
var clusterArgs = base.TestClusterArgs{
- // We dont want the replicate queue mucking with our test, so disable it.
+ // We don't want the replicate queue mucking with our test, so disable it.
+ // This also disables the merge queue, until it is manually enabled.
ReplicationMode: base.ReplicationManual,
ServerArgs: base.TestServerArgs{
Knobs: base.TestingKnobs{
@@ -4945,17 +4944,9 @@ func TestMergeQueueWithSlowNonVoterSnaps(t *testing.T) {
numNodes := 3
tc, _ := setupTestClusterWithDummyRange(t, clusterArgs, dbName, tableName, numNodes)
defer tc.Stopper().Stop(ctx)
- // We're controlling merge queue operation via
- // `store.SetMergeQueueActive`, so enable the cluster setting here.
- _, err := tc.ServerConn(0).Exec(`SET CLUSTER SETTING kv.range_merge.queue.enabled=true`)
- require.NoError(t, err)
store, err := tc.Server(0).GetStores().(*kvserver.Stores).GetStore(1)
require.Nil(t, err)
- // We're going to split the dummy range created above with an empty
- // expiration time. Disable the merge queue before splitting so that the
- // split ranges aren't immediately merged.
- store.SetMergeQueueActive(false)
leftDesc, rightDesc := splitDummyRangeInTestCluster(
t, tc, dbName, tableName, hlc.Timestamp{}, /* splitExpirationTime */
)
@@ -4972,7 +4963,6 @@ func TestMergeQueueWithSlowNonVoterSnaps(t *testing.T) {
time.Sleep(5 * time.Second)
return nil
})
- store.SetMergeQueueActive(true)
verifyMergedSoon(t, store, leftDesc.StartKey, rightDesc.StartKey)
}
diff --git a/pkg/kv/kvserver/client_raft_log_queue_test.go b/pkg/kv/kvserver/client_raft_log_queue_test.go
index a988d1a02970..9a466877c684 100644
--- a/pkg/kv/kvserver/client_raft_log_queue_test.go
+++ b/pkg/kv/kvserver/client_raft_log_queue_test.go
@@ -20,7 +20,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/rpc"
"github.com/cockroachdb/cockroach/pkg/rpc/nodedialer"
@@ -34,8 +33,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
- "github.com/cockroachdb/cockroach/pkg/util/tracing"
- "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/vfs"
"github.com/gogo/protobuf/proto"
@@ -135,65 +132,6 @@ func TestRaftLogQueue(t *testing.T) {
}
}
-func TestRaftTracing(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- // TODO(baptist): Remove this once we change the default to be enabled.
- st := cluster.MakeTestingClusterSettings()
- rafttrace.MaxConcurrentRaftTraces.Override(context.Background(), &st.SV, 10)
-
- tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{
- ReplicationMode: base.ReplicationManual,
- ServerArgs: base.TestServerArgs{
- Settings: st,
- RaftConfig: base.RaftConfig{
- RangeLeaseDuration: 24 * time.Hour, // disable lease moves
- RaftElectionTimeoutTicks: 1 << 30, // disable elections
- },
- },
- })
- defer tc.Stopper().Stop(context.Background())
- store := tc.GetFirstStoreFromServer(t, 0)
-
- // Write a single value to ensure we have a leader on n1.
- key := tc.ScratchRange(t)
- _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), putArgs(key, []byte("value")))
- require.NoError(t, pErr.GoError())
- require.NoError(t, tc.WaitForSplitAndInitialization(key))
- // Set to have 3 voters.
- tc.AddVotersOrFatal(t, key, tc.Targets(1, 2)...)
- tc.WaitForVotersOrFatal(t, key, tc.Targets(1, 2)...)
-
- for i := 0; i < 100; i++ {
- var finish func() tracingpb.Recording
- ctx := context.Background()
- if i == 50 {
- // Trace a random request on a "client" tracer.
- ctx, finish = tracing.ContextWithRecordingSpan(ctx, store.GetStoreConfig().Tracer(), "test")
- }
- _, pErr := kv.SendWrapped(ctx, store.TestSender(), putArgs(key, []byte(fmt.Sprintf("value-%d", i))))
- require.NoError(t, pErr.GoError())
- // Note that this is the clients span, there may be additional logs created after the span is returned.
- if finish != nil {
- output := finish().String()
- // NB: It is hard to get all the messages in an expected order. We
- // simply ensure some of the key messages are returned. Also note
- // that we want to make sure that the logs are not reported against
- // the tracing library, but the line that called into it.
- expectedMessages := []string{
- `replica_proposal_buf.* flushing proposal to Raft`,
- `replica_proposal_buf.* registering local trace`,
- `replica_raft.* 1->2 MsgApp`,
- `replica_raft.* 1->3 MsgApp`,
- `replica_raft.* AppendThread->1 MsgStorageAppendResp`,
- `ack-ing replication success to the client`,
- }
- require.NoError(t, testutils.MatchInOrder(output, expectedMessages...))
- }
- }
-}
-
// TestCrashWhileTruncatingSideloadedEntries emulates a process crash in the
// middle of applying a raft log truncation command that removes some entries
// from the sideloaded storage. The test expects that storage remains in a
diff --git a/pkg/kv/kvserver/flow_control_integration_test.go b/pkg/kv/kvserver/flow_control_integration_test.go
index ff37488205d4..f07f1b02b5bd 100644
--- a/pkg/kv/kvserver/flow_control_integration_test.go
+++ b/pkg/kv/kvserver/flow_control_integration_test.go
@@ -22,6 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/kvflowinspectpb"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/rac2"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
@@ -2253,6 +2254,10 @@ func TestFlowControlBasicV2(t *testing.T) {
n1 := sqlutils.MakeSQLRunner(tc.ServerConn(0))
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- Flow token metrics, before issuing the 1MiB replicated write.`)
h.query(n1, v2FlowTokensQueryStr)
@@ -2342,6 +2347,10 @@ func TestFlowControlRangeSplitMergeV2(t *testing.T) {
require.NoError(t, err)
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.log("sending put request to pre-split range")
h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode))
h.log("sent put request to pre-split range")
@@ -2464,6 +2473,10 @@ func TestFlowControlBlockedAdmissionV2(t *testing.T) {
require.NoError(t, err)
h.enableVerboseRaftMsgLoggingForRange(desc.RangeID)
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- (Issuing 5 1MiB, 3x replicated write that's not admitted.)`)
h.log("sending put requests")
@@ -2579,6 +2592,10 @@ func TestFlowControlAdmissionPostSplitMergeV2(t *testing.T) {
require.NoError(t, err)
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.log("sending put request to pre-split range")
h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode))
@@ -2722,6 +2739,10 @@ func TestFlowControlCrashedNodeV2(t *testing.T) {
require.NoError(t, err)
tc.TransferRangeLeaseOrFatal(t, desc, tc.Target(0))
h.waitForConnectedStreams(ctx, desc.RangeID, 2, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- (Issuing 5x1MiB, 2x replicated writes that are not admitted.)`)
h.log("sending put requests")
@@ -2870,6 +2891,10 @@ func TestFlowControlRaftSnapshotV2(t *testing.T) {
repl := store.LookupReplica(roachpb.RKey(k))
require.NotNil(t, repl)
h.waitForConnectedStreams(ctx, repl.RangeID, 5, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
// Set up a key to replicate across the cluster. We're going to modify this
// key and truncate the raft logs from that command after killing one of the
@@ -3085,6 +3110,10 @@ func TestFlowControlRaftMembershipV2(t *testing.T) {
desc, err := tc.LookupRange(k)
require.NoError(t, err)
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- (Issuing 1x1MiB, 3x replicated write that's not admitted.)`)
h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode))
@@ -3224,6 +3253,10 @@ func TestFlowControlRaftMembershipRemoveSelfV2(t *testing.T) {
// Make sure the lease is on n1 and that we're triply connected.
tc.TransferRangeLeaseOrFatal(t, desc, tc.Target(0))
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- (Issuing 1x1MiB, 3x replicated write that's not admitted.)`)
h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode))
@@ -3353,6 +3386,10 @@ func TestFlowControlClassPrioritizationV2(t *testing.T) {
desc, err := tc.LookupRange(k)
require.NoError(t, err)
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- (Issuing 1x1MiB, 3x replicated elastic write that's not admitted.)`)
h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode))
@@ -3469,6 +3506,10 @@ func TestFlowControlUnquiescedRangeV2(t *testing.T) {
n1 := sqlutils.MakeSQLRunner(tc.ServerConn(0))
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- (Issuing 1x1MiB, 3x replicated elastic write that's not admitted.)`)
h.put(ctx, k, 1<<20 /* 1MiB */, admissionpb.BulkNormalPri)
@@ -3571,6 +3612,10 @@ func TestFlowControlTransferLeaseV2(t *testing.T) {
desc, err := tc.LookupRange(k)
require.NoError(t, err)
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- (Issuing 1x1MiB, 3x replicated write that's not admitted.)`)
h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode))
@@ -3664,6 +3709,10 @@ func TestFlowControlLeaderNotLeaseholderV2(t *testing.T) {
desc, err := tc.LookupRange(k)
require.NoError(t, err)
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- (Issuing 1x1MiB, 3x replicated write that's not admitted.)`)
h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode))
@@ -3780,6 +3829,10 @@ func TestFlowControlGranterAdmitOneByOneV2(t *testing.T) {
desc, err := tc.LookupRange(k)
require.NoError(t, err)
h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */)
+ // Reset the token metrics, since a send queue may have instantly
+ // formed when adding one of the replicas, before being quickly
+ // drained.
+ h.resetV2TokenMetrics(ctx)
h.comment(`-- (Issuing 1024*1KiB, 3x replicated writes that are not admitted.)`)
h.log("sending put requests")
@@ -4865,6 +4918,18 @@ func (h *flowControlTestHelper) enableVerboseRaftMsgLoggingForRange(rangeID roac
}
}
+func (h *flowControlTestHelper) resetV2TokenMetrics(ctx context.Context) {
+ for _, server := range h.tc.Servers {
+ require.NoError(h.t, server.GetStores().(*kvserver.Stores).VisitStores(func(s *kvserver.Store) error {
+ s.GetStoreConfig().KVFlowStreamTokenProvider.Metrics().(*rac2.TokenMetrics).TestingClear()
+ _, err := s.ComputeMetricsPeriodically(ctx, nil, 0)
+ require.NoError(h.t, err)
+ s.GetStoreConfig().KVFlowStreamTokenProvider.UpdateMetricGauges()
+ return nil
+ }))
+ }
+}
+
// makeV2EnabledTestFileName is a utility function which returns an updated
// filename for the testdata file based on the v2EnabledWhenLeaderLevel.
func makeV2EnabledTestFileName(
diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go b/pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go
index eb515d41da18..de2108c567be 100644
--- a/pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go
+++ b/pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go
@@ -197,6 +197,34 @@ func NewTokenMetrics() *TokenMetrics {
return m
}
+// TestingClear is used in tests to reset the metrics.
+func (m *TokenMetrics) TestingClear() {
+ // NB: we only clear the counter metrics, as the stream metrics are gauges.
+ for _, typ := range []TokenType{
+ EvalToken,
+ SendToken,
+ } {
+ for _, wc := range []admissionpb.WorkClass{
+ admissionpb.RegularWorkClass,
+ admissionpb.ElasticWorkClass,
+ } {
+ m.CounterMetrics[typ].Deducted[wc].Clear()
+ m.CounterMetrics[typ].Returned[wc].Clear()
+ m.CounterMetrics[typ].Unaccounted[wc].Clear()
+ m.CounterMetrics[typ].Disconnected[wc].Clear()
+ if typ == SendToken {
+ m.CounterMetrics[typ].SendQueue[0].ForceFlushDeducted.Clear()
+ for _, wc := range []admissionpb.WorkClass{
+ admissionpb.RegularWorkClass,
+ admissionpb.ElasticWorkClass,
+ } {
+ m.CounterMetrics[typ].SendQueue[0].PreventionDeducted[wc].Clear()
+ }
+ }
+ }
+ }
+}
+
type TokenCounterMetrics struct {
Deducted [admissionpb.NumWorkClasses]*metric.Counter
Returned [admissionpb.NumWorkClasses]*metric.Counter
diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller.go b/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller.go
index 7458a2ac7beb..b2ccd2c9a52f 100644
--- a/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller.go
+++ b/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller.go
@@ -71,7 +71,7 @@ type RangeController interface {
//
// Requires replica.raftMu to be held.
HandleSchedulerEventRaftMuLocked(
- ctx context.Context, mode RaftMsgAppMode, logSnapshot raft.LogSnapshot)
+ ctx context.Context, mode RaftMsgAppMode, logSnapshot RaftLogSnapshot)
// AdmitRaftMuLocked handles the notification about the given replica's
// admitted vector change. No-op if the replica is not known, or the admitted
// vector is stale (either in Term, or the indices).
@@ -152,6 +152,31 @@ type RaftInterface interface {
SendMsgAppRaftMuLocked(replicaID roachpb.ReplicaID, slice raft.LogSlice) (raftpb.Message, bool)
}
+// RaftLogSnapshot abstract raft.LogSnapshot.
+type RaftLogSnapshot interface {
+ // LogSlice returns a slice containing a prefix of [start, end). It must
+ // only be called in MsgAppPull mode for followers. The maxSize is required
+ // to be > 0.
+ //
+ // Returns the longest prefix of entries in the [start, end) interval such
+ // that the total size of the entries does not exceed maxSize. The limit can
+ // only be exceeded if the first entry is larger than maxSize, in which case
+ // only this first entry is returned.
+ //
+ // Returns an error if the log is truncated beyond the start index, or there
+ // is some other transient problem.
+ //
+ // NB: the [start, end) interval is different from RawNode.LogSlice which
+ // accepts an open-closed interval.
+ //
+ // TODO(#132789): change the semantics so that maxSize can be exceeded not
+ // only if the first entry is large. It should be ok to exceed maxSize if the
+ // last entry makes it so. In the underlying storage implementation, we have
+ // paid the cost of fetching this entry anyway, so there is no need to drop it
+ // from the result.
+ LogSlice(start, end uint64, maxSize uint64) (raft.LogSlice, error)
+}
+
// RaftMsgAppMode specifies how Raft (at the leader) generates MsgApps. In
// both modes, Raft knows that (Match(i), Next(i)) are in-flight for a
// follower i.
@@ -347,7 +372,7 @@ type RaftEvent struct {
MsgApps map[roachpb.ReplicaID][]raftpb.Message
// LogSnapshot must be populated on the leader, when operating in MsgAppPull
// mode. It is used (along with RaftInterface) to construct MsgApps.
- LogSnapshot raft.LogSnapshot
+ LogSnapshot RaftLogSnapshot
// ReplicasStateInfo contains the state of all replicas. This is used to
// determine if the state of a replica has changed, and if so, to update the
// flow control state. It also informs the RangeController of a replica's
@@ -378,7 +403,7 @@ func RaftEventFromMsgStorageAppendAndMsgApps(
replicaID roachpb.ReplicaID,
appendMsg raftpb.Message,
outboundMsgs []raftpb.Message,
- logSnapshot raft.LogSnapshot,
+ logSnapshot RaftLogSnapshot,
msgAppScratch map[roachpb.ReplicaID][]raftpb.Message,
replicaStateInfoMap map[roachpb.ReplicaID]ReplicaStateInfo,
) RaftEvent {
@@ -794,7 +819,7 @@ type raftEventForReplica struct {
newEntries []entryFCState
sendingEntries []entryFCState
recreateSendStream bool
- logSnapshot raft.LogSnapshot
+ logSnapshot RaftLogSnapshot
}
// raftEventAppendState is the general state computed from RaftEvent that is
@@ -828,7 +853,7 @@ func constructRaftEventForReplica(
latestReplicaStateInfo ReplicaStateInfo,
existingSendStreamState existingSendStreamState,
msgApps []raftpb.Message,
- logSnapshot raft.LogSnapshot,
+ logSnapshot RaftLogSnapshot,
scratchSendingEntries []entryFCState,
) (_ raftEventForReplica, scratch []entryFCState) {
firstNewEntryIndex, lastNewEntryIndex := uint64(math.MaxUint64), uint64(math.MaxUint64)
@@ -1273,7 +1298,7 @@ func (rc *rangeController) computeVoterDirectives(
// HandleSchedulerEventRaftMuLocked implements RangeController.
func (rc *rangeController) HandleSchedulerEventRaftMuLocked(
- ctx context.Context, mode RaftMsgAppMode, logSnapshot raft.LogSnapshot,
+ ctx context.Context, mode RaftMsgAppMode, logSnapshot RaftLogSnapshot,
) {
var scheduledScratch [5]*replicaState
// scheduled will contain all the replicas in scheduledMu.replicas, filtered
@@ -2273,7 +2298,7 @@ func (rs *replicaState) handleReadyStateRaftMuLocked(
//
// closedReplica => !scheduleAgain.
func (rs *replicaState) scheduledRaftMuLocked(
- ctx context.Context, mode RaftMsgAppMode, logSnapshot raft.LogSnapshot,
+ ctx context.Context, mode RaftMsgAppMode, logSnapshot RaftLogSnapshot,
) (scheduleAgain bool, updateWaiterSets bool) {
if rs.desc.ReplicaID == rs.parent.opts.LocalReplicaID {
panic("scheduled called on the leader replica")
@@ -2328,7 +2353,7 @@ func (rs *replicaState) scheduledRaftMuLocked(
// entries not subject to flow control will be tiny. We of course return the
// unused tokens for entries not subject to flow control.
slice, err := logSnapshot.LogSlice(
- rss.mu.sendQueue.indexToSend-1, rss.mu.sendQueue.nextRaftIndex-1, uint64(bytesToSend))
+ rss.mu.sendQueue.indexToSend, rss.mu.sendQueue.nextRaftIndex, uint64(bytesToSend))
var msg raftpb.Message
if err == nil {
var sent bool
@@ -2530,7 +2555,7 @@ func (rss *replicaSendStream) handleReadyEntriesRaftMuAndStreamLocked(
// NB: this will not do IO since everything here is in the unstable log
// (see raft.LogSnapshot.unstable).
slice, err := event.logSnapshot.LogSlice(
- event.sendingEntries[0].id.index-1, event.sendingEntries[n-1].id.index, math.MaxInt64)
+ event.sendingEntries[0].id.index, event.sendingEntries[n-1].id.index+1, math.MaxInt64)
if err != nil {
return false, err
}
diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller_test.go b/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller_test.go
index 74431bd41e10..0758567a02fb 100644
--- a/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller_test.go
+++ b/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller_test.go
@@ -9,7 +9,6 @@ import (
"cmp"
"context"
"fmt"
- "math"
"slices"
"sort"
"strconv"
@@ -322,9 +321,6 @@ func (s *testingRCState) getOrInitRange(
testRC.mu.evals = make(map[string]*testingRCEval)
testRC.mu.outstandingReturns = make(map[roachpb.ReplicaID]kvflowcontrol.Tokens)
testRC.mu.quorumPosition = kvflowcontrolpb.RaftLogPosition{Term: 1, Index: 0}
- _ = testRC.raftLog.ApplySnapshot(raftpb.Snapshot{
- Metadata: raftpb.SnapshotMetadata{Index: r.nextRaftIndex - 1},
- })
options := RangeControllerOptions{
RangeID: r.rangeID,
TenantID: r.tenantID,
@@ -374,7 +370,7 @@ type testingRCRange struct {
// snapshots contain snapshots of the tracker state for different replicas,
// at various points in time. It is used in TestUsingSimulation.
snapshots []testingTrackerSnapshot
- raftLog raft.MemoryStorage
+ entries []raftpb.Entry
mu struct {
syncutil.Mutex
@@ -437,8 +433,28 @@ func (r *testingRCRange) ScheduleControllerEvent(rangeID roachpb.RangeID) {
r.scheduleControllerEventCount.Add(1)
}
-func (r *testingRCRange) logSnapshot() raft.LogSnapshot {
- return raft.MakeLogSnapshot(&r.raftLog)
+func (r *testingRCRange) LogSlice(start, end uint64, maxSize uint64) (raft.LogSlice, error) {
+ if start >= end {
+ panic("start >= end")
+ }
+ var size uint64
+ var entries []raftpb.Entry
+ for _, entry := range r.entries {
+ if entry.Index < start || entry.Index >= end {
+ continue
+ }
+ size += uint64(entry.Size())
+ // Allow exceeding the size limit only if this is the first entry.
+ if size > maxSize && len(entries) != 0 {
+ break
+ }
+ entries = append(entries, entry)
+ if size >= maxSize {
+ break
+ }
+ }
+ // TODO(pav-kv): use a real LogSnapshot and construct a correct LogSlice.
+ return raft.MakeLogSlice(entries), nil
}
func (r *testingRCRange) SendMsgAppRaftMuLocked(
@@ -1209,20 +1225,17 @@ func TestRangeController(t *testing.T) {
mode = MsgAppPull
}
for _, event := range parseRaftEvents(t, d.Input) {
- entries := make([]raftpb.Entry, len(event.entries))
- for i, entry := range event.entries {
- entries[i] = testingCreateEntry(t, entry)
- }
testRC := state.ranges[event.rangeID]
- require.NoError(t, testRC.raftLog.Append(entries))
-
raftEvent := RaftEvent{
MsgAppMode: mode,
- Entries: entries,
+ Entries: make([]raftpb.Entry, len(event.entries)),
MsgApps: map[roachpb.ReplicaID][]raftpb.Message{},
- LogSnapshot: testRC.logSnapshot(),
+ LogSnapshot: testRC,
ReplicasStateInfo: state.ranges[event.rangeID].replicasStateInfo(),
}
+ for i, entry := range event.entries {
+ raftEvent.Entries[i] = testingCreateEntry(t, entry)
+ }
msgApp := raftpb.Message{
Type: raftpb.MsgApp,
To: 0,
@@ -1230,6 +1243,7 @@ func TestRangeController(t *testing.T) {
// suffix of entries that were previously appended, down below.
Entries: nil,
}
+ testRC.entries = append(testRC.entries, raftEvent.Entries...)
func() {
testRC.mu.Lock()
defer testRC.mu.Unlock()
@@ -1246,9 +1260,11 @@ func TestRangeController(t *testing.T) {
} else {
fromIndex := event.sendingEntryRange[replicaID].fromIndex
toIndex := event.sendingEntryRange[replicaID].toIndex
- entries, err := testRC.raftLog.Entries(fromIndex, toIndex+1, math.MaxUint64)
- require.NoError(t, err)
- msgApp.Entries = entries
+ for _, entry := range testRC.entries {
+ if entry.Index >= fromIndex && entry.Index <= toIndex {
+ msgApp.Entries = append(msgApp.Entries, entry)
+ }
+ }
}
raftEvent.MsgApps[replicaID] = append([]raftpb.Message(nil), msgApp)
}
@@ -1298,7 +1314,7 @@ func TestRangeController(t *testing.T) {
if d.HasArg("push-mode") {
mode = MsgAppPush
}
- testRC.rc.HandleSchedulerEventRaftMuLocked(ctx, mode, testRC.logSnapshot())
+ testRC.rc.HandleSchedulerEventRaftMuLocked(ctx, mode, testRC)
// Sleep for a bit to allow any timers to fire.
time.Sleep(20 * time.Millisecond)
return state.sendStreamString(roachpb.RangeID(rangeID))
@@ -1549,6 +1565,12 @@ func testingFirst(args ...interface{}) interface{} {
return nil
}
+type testLogSnapshot struct{}
+
+func (testLogSnapshot) LogSlice(start, end uint64, maxSize uint64) (raft.LogSlice, error) {
+ return raft.LogSlice{}, nil
+}
+
func TestRaftEventFromMsgStorageAppendAndMsgAppsBasic(t *testing.T) {
// raftpb.Entry and raftpb.Message are only partially populated below, which
// could be improved in the future.
@@ -1591,10 +1613,10 @@ func TestRaftEventFromMsgStorageAppendAndMsgAppsBasic(t *testing.T) {
},
}
msgAppScratch := map[roachpb.ReplicaID][]raftpb.Message{}
- logSnap := raft.LogSnapshot{}
+ var logSnap testLogSnapshot
infoMap := map[roachpb.ReplicaID]ReplicaStateInfo{}
checkSnapAndMap := func(event RaftEvent) {
- require.Equal(t, logSnap, event.LogSnapshot)
+ require.Equal(t, logSnap, event.LogSnapshot.(testLogSnapshot))
require.Equal(t, infoMap, event.ReplicasStateInfo)
}
@@ -1610,7 +1632,7 @@ func TestRaftEventFromMsgStorageAppendAndMsgAppsBasic(t *testing.T) {
event = RaftEventFromMsgStorageAppendAndMsgApps(
MsgAppPush, 20, raftpb.Message{}, nil, logSnap, msgAppScratch, infoMap)
checkSnapAndMap(event)
- event.LogSnapshot = raft.LogSnapshot{}
+ event.LogSnapshot = nil
event.ReplicasStateInfo = nil
require.Equal(t, RaftEvent{}, event)
// Outbound msgs contains no MsgApps for a follower, since the only MsgApp
@@ -2262,7 +2284,7 @@ func TestConstructRaftEventForReplica(t *testing.T) {
tc.latestReplicaStateInfo,
tc.existingSendStreamState,
tc.msgApps,
- raft.LogSnapshot{},
+ nil,
tc.scratchSendingEntries,
)
})
@@ -2274,7 +2296,7 @@ func TestConstructRaftEventForReplica(t *testing.T) {
tc.latestReplicaStateInfo,
tc.existingSendStreamState,
tc.msgApps,
- raft.LogSnapshot{},
+ nil,
tc.scratchSendingEntries,
)
require.Equal(t, tc.expectedRaftEventReplica, gotRaftEventReplica)
diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/store_stream.go b/pkg/kv/kvserver/kvflowcontrol/rac2/store_stream.go
index a2d675e8513e..ef6200b4b3be 100644
--- a/pkg/kv/kvserver/kvflowcontrol/rac2/store_stream.go
+++ b/pkg/kv/kvserver/kvflowcontrol/rac2/store_stream.go
@@ -541,8 +541,8 @@ func (w *sendStreamTokenWatcher) run(_ context.Context) {
select {
case <-w.stopper.ShouldQuiesce():
return
- case <-handle.waitChannel():
- if handle.confirmHaveTokensAndUnblockNextWaiter() {
+ case <-handle.WaitChannel():
+ if handle.ConfirmHaveTokensAndUnblockNextWaiter() {
break waiting
}
}
diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter.go b/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter.go
index a046c0667635..c371652109b1 100644
--- a/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter.go
+++ b/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter.go
@@ -22,6 +22,40 @@ import (
"github.com/cockroachdb/redact"
)
+// TokenWaitingHandle is the interface for waiting for positive tokens from a
+// token counter.
+//
+// TODO(sumeer): remove this interface since there is only one implementation.
+type TokenWaitingHandle interface {
+ // WaitChannel is the channel that will be signaled if tokens are possibly
+ // available. If signaled, the caller must call
+ // ConfirmHaveTokensAndUnblockNextWaiter. There is no guarantee of tokens
+ // being available after this channel is signaled, just that tokens were
+ // available recently. A typical usage pattern is:
+ //
+ // for {
+ // select {
+ // case <-handle.WaitChannel():
+ // if handle.ConfirmHaveTokensAndUnblockNextWaiter() {
+ // break
+ // }
+ // }
+ // }
+ // tokenCounter.Deduct(...)
+ //
+ // There is a possibility for races, where multiple goroutines may be
+ // signaled and deduct tokens, sending the counter into debt. These cases are
+ // acceptable, as in aggregate the counter provides pacing over time.
+ WaitChannel() <-chan struct{}
+ // ConfirmHaveTokensAndUnblockNextWaiter is called to confirm tokens are
+ // available. True is returned if tokens are available, false otherwise. If
+ // no tokens are available, the caller can resume waiting using WaitChannel.
+ ConfirmHaveTokensAndUnblockNextWaiter() bool
+ // StreamString returns a string representation of the stream. Used for
+ // tracing.
+ StreamString() string
+}
+
// tokenCounterPerWorkClass is a helper struct for implementing tokenCounter.
// tokens are protected by the mutex in tokenCounter. Operations on the
// signalCh may not be protected by that mutex -- see the comment below.
@@ -269,16 +303,15 @@ func (t *tokenCounter) limit(wc admissionpb.WorkClass) kvflowcontrol.Tokens {
return t.mu.counters[wc].limit
}
-// TokensAvailable returns true if tokens are available, in which case handle
-// is empty and should be ignored. If false, it returns a handle that may be
-// used for waiting for tokens to become available.
+// TokensAvailable returns true if tokens are available. If false, it returns
+// a handle that may be used for waiting for tokens to become available.
func (t *tokenCounter) TokensAvailable(
wc admissionpb.WorkClass,
-) (available bool, handle tokenWaitHandle) {
+) (available bool, handle TokenWaitingHandle) {
if t.tokens(wc) > 0 {
- return true, tokenWaitHandle{}
+ return true, nil
}
- return false, tokenWaitHandle{wc: wc, b: t}
+ return false, waitHandle{wc: wc, b: t}
}
// TryDeduct attempts to deduct flow tokens for the given work class. If there
@@ -325,23 +358,25 @@ func (t *tokenCounter) Return(
t.adjust(ctx, wc, tokens, flag)
}
-// tokenWaitHandle is a handle for waiting for tokens to become available from
-// a token counter.
-type tokenWaitHandle struct {
+// waitHandle is a handle for waiting for tokens to become available from a
+// token counter.
+type waitHandle struct {
wc admissionpb.WorkClass
b *tokenCounter
}
-// waitChannel is the channel that will be signaled if tokens are possibly
+var _ TokenWaitingHandle = waitHandle{}
+
+// WaitChannel is the channel that will be signaled if tokens are possibly
// available. If signaled, the caller must call
-// confirmHaveTokensAndUnblockNextWaiter. There is no guarantee of tokens being
+// ConfirmHaveTokensAndUnblockNextWaiter. There is no guarantee of tokens being
// available after this channel is signaled, just that tokens were available
// recently. A typical usage pattern is:
//
// for {
// select {
-// case <-handle.waitChannel():
-// if handle.confirmHaveTokensAndUnblockNextWaiter() {
+// case <-handle.WaitChannel():
+// if handle.ConfirmHaveTokensAndUnblockNextWaiter() {
// break
// }
// }
@@ -351,14 +386,14 @@ type tokenWaitHandle struct {
// There is a possibility for races, where multiple goroutines may be signaled
// and deduct tokens, sending the counter into debt. These cases are
// acceptable, as in aggregate the counter provides pacing over time.
-func (wh tokenWaitHandle) waitChannel() <-chan struct{} {
+func (wh waitHandle) WaitChannel() <-chan struct{} {
return wh.b.mu.counters[wh.wc].signalCh
}
-// confirmHaveTokensAndUnblockNextWaiter is called to confirm tokens are
+// ConfirmHaveTokensAndUnblockNextWaiter is called to confirm tokens are
// available. True is returned if tokens are available, false otherwise. If no
-// tokens are available, the caller can resume waiting using waitChannel.
-func (wh tokenWaitHandle) confirmHaveTokensAndUnblockNextWaiter() (haveTokens bool) {
+// tokens are available, the caller can resume waiting using WaitChannel.
+func (wh waitHandle) ConfirmHaveTokensAndUnblockNextWaiter() (haveTokens bool) {
haveTokens = wh.b.tokens(wh.wc) > 0
if haveTokens {
// Signal the next waiter if we have tokens available before returning.
@@ -367,15 +402,14 @@ func (wh tokenWaitHandle) confirmHaveTokensAndUnblockNextWaiter() (haveTokens bo
return haveTokens
}
-// streamString returns a string representation of the stream. Used for
-// tracing.
-func (wh tokenWaitHandle) streamString() string {
+// StreamString implements TokenWaitingHandle.
+func (wh waitHandle) StreamString() string {
return wh.b.stream.String()
}
type tokenWaitingHandleInfo struct {
- // Can be empty, in which case no methods should be called on it.
- handle tokenWaitHandle
+ // Can be nil, in which case the wait on this can never succeed.
+ handle TokenWaitingHandle
// requiredWait will be set for the leaseholder and leader for regular work.
// For elastic work this will be set for the aforementioned, and all replicas
// which are in StateReplicate.
@@ -457,8 +491,8 @@ func WaitForEval(
requiredWaitCount++
}
var chanValue reflect.Value
- if h.handle != (tokenWaitHandle{}) {
- chanValue = reflect.ValueOf(h.handle.waitChannel())
+ if h.handle != nil {
+ chanValue = reflect.ValueOf(h.handle.WaitChannel())
}
// Else, zero Value, so will never be selected.
scratch = append(scratch,
@@ -495,7 +529,7 @@ func WaitForEval(
return ReplicaRefreshWaitSignaled, scratch
default:
handleInfo := handles[chosen-3]
- if available := handleInfo.handle.confirmHaveTokensAndUnblockNextWaiter(); !available {
+ if available := handleInfo.handle.ConfirmHaveTokensAndUnblockNextWaiter(); !available {
// The handle was signaled but does not currently have tokens
// available. Continue waiting on this handle.
continue
@@ -503,7 +537,7 @@ func WaitForEval(
if traceIndividualWaits {
log.Eventf(ctx, "wait-for-eval: waited until %s tokens available",
- handleInfo.handle.streamString())
+ handleInfo.handle.StreamString())
}
if handleInfo.partOfQuorum {
signaledQuorumCount++
diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter_test.go b/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter_test.go
index 3187ca37a92c..2392a9d96094 100644
--- a/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter_test.go
+++ b/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter_test.go
@@ -297,7 +297,7 @@ func TestTokenCounter(t *testing.T) {
assertStateReset := func(t *testing.T) {
available, handle := counter.TokensAvailable(admissionpb.ElasticWorkClass)
require.True(t, available)
- require.Equal(t, tokenWaitHandle{}, handle)
+ require.Nil(t, handle)
require.Equal(t, limits.regular, counter.tokens(admissionpb.RegularWorkClass))
require.Equal(t, limits.elastic, counter.tokens(admissionpb.ElasticWorkClass))
}
@@ -307,11 +307,11 @@ func TestTokenCounter(t *testing.T) {
// classes.
available, handle := counter.TokensAvailable(admissionpb.RegularWorkClass)
require.True(t, available)
- require.Equal(t, tokenWaitHandle{}, handle)
+ require.Nil(t, handle)
available, handle = counter.TokensAvailable(admissionpb.ElasticWorkClass)
require.True(t, available)
- require.Equal(t, tokenWaitHandle{}, handle)
+ require.Nil(t, handle)
assertStateReset(t)
})
@@ -326,7 +326,7 @@ func TestTokenCounter(t *testing.T) {
// Now there should be no tokens available for regular work class.
available, handle := counter.TokensAvailable(admissionpb.RegularWorkClass)
require.False(t, available)
- require.NotEqual(t, tokenWaitHandle{}, handle)
+ require.NotNil(t, handle)
counter.Return(ctx, admissionpb.RegularWorkClass, limits.regular, AdjNormal)
assertStateReset(t)
})
@@ -353,18 +353,18 @@ func TestTokenCounter(t *testing.T) {
// returned.
available, handle := counter.TokensAvailable(admissionpb.RegularWorkClass)
require.False(t, available)
- require.NotEqual(t, tokenWaitHandle{}, handle)
+ require.NotNil(t, handle)
counter.Return(ctx, admissionpb.RegularWorkClass, limits.regular, AdjNormal)
// Wait on the handle to be unblocked and expect that there are tokens
// available when the wait channel is signaled.
- <-handle.waitChannel()
- haveTokens := handle.confirmHaveTokensAndUnblockNextWaiter()
+ <-handle.WaitChannel()
+ haveTokens := handle.ConfirmHaveTokensAndUnblockNextWaiter()
require.True(t, haveTokens)
// Wait on the handle to be unblocked again, this time try deducting such
// that there are no tokens available after.
counter.Deduct(ctx, admissionpb.RegularWorkClass, limits.regular, AdjNormal)
- <-handle.waitChannel()
- haveTokens = handle.confirmHaveTokensAndUnblockNextWaiter()
+ <-handle.WaitChannel()
+ haveTokens = handle.ConfirmHaveTokensAndUnblockNextWaiter()
require.False(t, haveTokens)
// Return the tokens deducted from the first wait above.
counter.Return(ctx, admissionpb.RegularWorkClass, limits.regular, AdjNormal)
@@ -394,14 +394,14 @@ func TestTokenCounter(t *testing.T) {
// available.
available, handle := counter.TokensAvailable(admissionpb.RegularWorkClass)
if !available {
- <-handle.waitChannel()
+ <-handle.WaitChannel()
// This may or may not have raced with another goroutine, there's
// no guarantee we have tokens here. If we don't have tokens here,
// the next call to TryDeduct will fail (unless someone returns
// tokens between here and that call), which is harmless. This test
// is using TokensAvailable and the returned handle to avoid
// busy-waiting.
- handle.confirmHaveTokensAndUnblockNextWaiter()
+ handle.ConfirmHaveTokensAndUnblockNextWaiter()
}
}
@@ -416,8 +416,8 @@ func TestTokenCounter(t *testing.T) {
})
}
-func (t *tokenCounter) testingHandle() tokenWaitHandle {
- return tokenWaitHandle{wc: admissionpb.RegularWorkClass, b: t}
+func (t *tokenCounter) testingHandle() waitHandle {
+ return waitHandle{wc: admissionpb.RegularWorkClass, b: t}
}
type namedTokenCounter struct {
diff --git a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor.go b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor.go
index 1dd2fb8af8f4..e4c457ca0db7 100644
--- a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor.go
+++ b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor.go
@@ -15,7 +15,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/kvflowinspectpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/rac2"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftlog"
- "github.com/cockroachdb/cockroach/pkg/raft"
"github.com/cockroachdb/cockroach/pkg/raft/raftpb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/admission/admissionpb"
@@ -354,7 +353,7 @@ type Processor interface {
//
// raftMu is held.
ProcessSchedulerEventRaftMuLocked(
- ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot raft.LogSnapshot)
+ ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot rac2.RaftLogSnapshot)
// InspectRaftMuLocked returns a handle to inspect the state of the
// underlying range controller. It is used to power /inspectz-style debugging
@@ -1140,7 +1139,7 @@ func (p *processorImpl) AdmitForEval(
// ProcessSchedulerEventRaftMuLocked implements Processor.
func (p *processorImpl) ProcessSchedulerEventRaftMuLocked(
- ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot raft.LogSnapshot,
+ ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot rac2.RaftLogSnapshot,
) {
p.opts.Replica.RaftMuAssertHeld()
if p.destroyed {
diff --git a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor_test.go b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor_test.go
index 1d4db9902eb1..5efce9a4484e 100644
--- a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor_test.go
+++ b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor_test.go
@@ -203,7 +203,7 @@ func (c *testRangeController) HandleRaftEventRaftMuLocked(
}
func (c *testRangeController) HandleSchedulerEventRaftMuLocked(
- _ context.Context, _ rac2.RaftMsgAppMode, _ raft.LogSnapshot,
+ ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot rac2.RaftLogSnapshot,
) {
panic("HandleSchedulerEventRaftMuLocked is unimplemented")
}
diff --git a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/raft_node.go b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/raft_node.go
index a20de90911c9..381994706f70 100644
--- a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/raft_node.go
+++ b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/raft_node.go
@@ -73,3 +73,12 @@ func (rn raftNodeForRACv2) SendMsgAppRaftMuLocked(
defer rn.r.MuUnlock()
return rn.RawNode.SendMsgApp(raftpb.PeerID(replicaID), ls)
}
+
+type RaftLogSnapshot raft.LogSnapshot
+
+var _ rac2.RaftLogSnapshot = RaftLogSnapshot{}
+
+// LogSlice implements rac2.RaftLogSnapshot.
+func (l RaftLogSnapshot) LogSlice(start, end uint64, maxSize uint64) (raft.LogSlice, error) {
+ return (raft.LogSnapshot(l)).LogSlice(start-1, end-1, maxSize)
+}
diff --git a/pkg/kv/kvserver/kvserverpb/raft.proto b/pkg/kv/kvserver/kvserverpb/raft.proto
index 5dda2a59f4eb..2a8897e1012d 100644
--- a/pkg/kv/kvserver/kvserverpb/raft.proto
+++ b/pkg/kv/kvserver/kvserverpb/raft.proto
@@ -48,16 +48,6 @@ message RaftHeartbeat {
bool lagging_followers_on_quiesce_accurate = 10;
}
-// The traced entry from the leader along with the trace and span ID.
-message TracedEntry {
- uint64 index = 1 [(gogoproto.nullable) = false,
- (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/kv/kvpb.RaftIndex"];
- uint64 trace_id = 2 [(gogoproto.nullable) = false, (gogoproto.customname) = "TraceID",
- (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb.TraceID"];
- uint64 span_id = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "SpanID",
- (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb.SpanID"];
-}
-
// RaftMessageRequest is the request used to send raft messages using our
// protobuf-based RPC codec. If a RaftMessageRequest has a non-empty number of
// heartbeats or heartbeat_resps, the contents of the message field is treated
@@ -113,12 +103,6 @@ message RaftMessageRequest {
// indices. Used only with RACv2.
kv.kvserver.kvflowcontrol.kvflowcontrolpb.AdmittedState admitted_state = 14 [(gogoproto.nullable) = false];
- // TracedEntry is a mapping from Raft index to trace and span ids for this
- // request. They are set by the leaseholder and begin tracing on all
- // replicas. Currently, traces are not returned to the leaseholder, but
- // instead logged to a local log file.
- repeated TracedEntry traced_entries = 15 [(gogoproto.nullable) = false];
-
reserved 10;
}
diff --git a/pkg/kv/kvserver/raft.go b/pkg/kv/kvserver/raft.go
index 4f1e311004ca..553cf26e4098 100644
--- a/pkg/kv/kvserver/raft.go
+++ b/pkg/kv/kvserver/raft.go
@@ -267,7 +267,7 @@ func traceProposals(r *Replica, ids []kvserverbase.CmdIDKey, event string) {
r.mu.RLock()
for _, id := range ids {
if prop, ok := r.mu.proposals[id]; ok {
- ctxs = append(ctxs, prop.Context())
+ ctxs = append(ctxs, prop.ctx)
}
}
r.mu.RUnlock()
diff --git a/pkg/kv/kvserver/rafttrace/BUILD.bazel b/pkg/kv/kvserver/rafttrace/BUILD.bazel
deleted file mode 100644
index d4038ab33454..000000000000
--- a/pkg/kv/kvserver/rafttrace/BUILD.bazel
+++ /dev/null
@@ -1,38 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
-
-go_library(
- name = "rafttrace",
- srcs = ["rafttrace.go"],
- importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace",
- visibility = ["//visibility:public"],
- deps = [
- "//pkg/kv/kvpb",
- "//pkg/kv/kvserver/kvserverpb",
- "//pkg/raft",
- "//pkg/raft/raftpb",
- "//pkg/settings",
- "//pkg/settings/cluster",
- "//pkg/util/log",
- "//pkg/util/syncutil",
- "//pkg/util/tracing",
- "//pkg/util/tracing/tracingpb",
- "@com_github_cockroachdb_logtags//:logtags",
- "@com_github_cockroachdb_redact//:redact",
- ],
-)
-
-go_test(
- name = "rafttrace_test",
- srcs = ["rafttrace_test.go"],
- embed = [":rafttrace"],
- deps = [
- "//pkg/kv/kvpb",
- "//pkg/kv/kvserver/kvserverpb",
- "//pkg/raft/raftpb",
- "//pkg/settings/cluster",
- "//pkg/testutils",
- "//pkg/util/tracing",
- "//pkg/util/tracing/tracingpb",
- "@com_github_stretchr_testify//require",
- ],
-)
diff --git a/pkg/kv/kvserver/rafttrace/rafttrace.go b/pkg/kv/kvserver/rafttrace/rafttrace.go
deleted file mode 100644
index 2a6d945f8efb..000000000000
--- a/pkg/kv/kvserver/rafttrace/rafttrace.go
+++ /dev/null
@@ -1,477 +0,0 @@
-// Copyright 2024 The Cockroach Authors.
-//
-// Use of this software is governed by the CockroachDB Software License
-// included in the /LICENSE file.
-
-package rafttrace
-
-import (
- "context"
- "math"
- "sync/atomic"
-
- "github.com/cockroachdb/cockroach/pkg/kv/kvpb"
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
- "github.com/cockroachdb/cockroach/pkg/raft"
- "github.com/cockroachdb/cockroach/pkg/raft/raftpb"
- "github.com/cockroachdb/cockroach/pkg/settings"
- "github.com/cockroachdb/cockroach/pkg/settings/cluster"
- "github.com/cockroachdb/cockroach/pkg/util/log"
- "github.com/cockroachdb/cockroach/pkg/util/syncutil"
- "github.com/cockroachdb/cockroach/pkg/util/tracing"
- "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
- "github.com/cockroachdb/logtags"
- "github.com/cockroachdb/redact"
-)
-
-// MaxConcurrentRaftTraces is the maximum number of entries that can be traced
-// at any time on this store. Additional traces will be ignored until the number
-// of traces drops below the limit. Having too many active traces can negatively
-// impact performance as we iterate over all of them for some messages.
-//
-// TODO(baptist): Bump the default to a reasonable value like 10 that balances
-// usefulness with performance impact once we have validated the performance
-// impact.
-var MaxConcurrentRaftTraces = settings.RegisterIntSetting(
- settings.SystemOnly,
- "kv.raft.max_concurrent_traces",
- "the maximum number of tracked raft traces, 0 will disable tracing",
- 0,
- settings.IntInRange(0, 1000),
-)
-
-// traceValue represents the trace information for a single registration.
-type traceValue struct {
- traced kvserverpb.TracedEntry
- // ctx is a trace specific context used to log events on this trace.
- ctx context.Context
-
- mu struct {
- syncutil.Mutex
-
- // seenMsgAppResp tracks whether a MsgAppResp message has already been
- // logged by each replica peer. This limits the size of the log at a
- // small risk of missing some important messages in the case of dropped
- // messages or reproposals.
- seenMsgAppResp map[raftpb.PeerID]bool
-
- // seenMsgStorageAppendResp tracks whether a MsgStorageAppendResp
- // message has already been logged.
- seenMsgStorageAppendResp bool
-
- // propCtx is the underlying proposal context used for tracing to the
- // SQL trace.
- propCtx context.Context
-
- // propSpan is the span connected to the propCtx. It must be finished
- // when the trace is removed.
- propSpan *tracing.Span
- }
-}
-
-// logf logs the message to the trace context and the proposal context. The
-// proposal context is populated on the leaseholder and is attached to the SQL
-// trace.
-func (t *traceValue) logf(depth int, format string, args ...interface{}) {
- log.InfofDepth(t.ctx, depth+1, format, args...)
-
- t.mu.Lock()
- propCtx := t.mu.propCtx
- t.mu.Unlock()
- if propCtx != nil {
- log.VEventfDepth(propCtx, depth+1, 3, format, args...)
- }
-}
-
-// seenMsgAppResp returns true if it hasn't seen an MsgAppResp for this peer.
-func (t *traceValue) seenMsgAppResp(p raftpb.PeerID) bool {
- t.mu.Lock()
- defer t.mu.Unlock()
- if t.mu.seenMsgAppResp[p] {
- return true
- }
- t.mu.seenMsgAppResp[p] = true
- return false
-}
-
-// seenMsgStorageAppendResp returns true if it hasn't seen a
-// MsgStorageAppendResp for this trace.
-func (t *traceValue) seenMsgStorageAppendResp() bool {
- t.mu.Lock()
- defer t.mu.Unlock()
- if t.mu.seenMsgStorageAppendResp {
- return true
- }
- t.mu.seenMsgStorageAppendResp = true
- return false
-}
-
-// String attempts to balance uniqueness with readability by only keeping the
-// lower 16 bits of the trace and span.
-func (tv *traceValue) String() string {
- return redact.StringWithoutMarkers(tv)
-}
-
-func (tv *traceValue) SafeFormat(w redact.SafePrinter, _ rune) {
- w.Printf("i%d/%x.%x", tv.traced.Index, uint16(tv.traced.TraceID), uint16(tv.traced.SpanID))
-}
-
-// RaftTracer is a utility to trace the lifetime of raft log entries. It may log
-// some unrelated entries, since it does not consider entry or leader term. It
-// traces at most one MsgAppResp and MsgStorageAppendResp per index which is the
-// first one that is past our index entry. This limitation means it may not
-// capture all the relevant messages particularly if the term changes.
-//
-// The library will log in two different ways once to the standard cockroach log
-// and once to the SQL trace on the leaseholder.
-// TODO(baptist): Look at logging traces on followers and sending back to the
-// leader. It would need to be best effort, but might still be useful.
-// Alternatively, double-down on distributed trace collection if/when it's
-// supported. So that the trace does not need to be plumbed back to the
-// leaseholder / txn coordinator.
-type RaftTracer struct {
- // m is a map of all the currently traced entries for this replica. The
- // aggregate size of the map across all replicas is equal to or less than
- // numRegisteredStore unless the setting changes in which case we flush all
- // entries on the next register call. We add to numRegistered before we
- // update m, and delete from m before we remove from numRegistered to keep
- // this invariant. On a setting change we flush all existing traces on the
- // next call to register.
- // TODO(baptist): Look at alternatives to using a map such as a sparse array
- // or circular buffer. Specifically, we might be able to save some memory
- // allocations. Note that the propCtx in the traceValue is already pulled
- // from a pool inside the tracer.
- m syncutil.Map[kvpb.RaftIndex, traceValue]
-
- // numRegisteredStore is the number of currently registered traces for this
- // store, not this replica. The number of registered will normally be less
- // than the MaxConcurrentRaftTraces setting. If the setting is lowered, we
- // flush all traces on all replicas.
- numRegisteredStore *atomic.Int64
-
- // numRegisteredReplica is the number of currently registered traces for
- // this replica. The sum(numRegisteredReplica) <= numRegisteredStore. We set
- // numRegisteredReplica to MaxInt32 when we close the tracer to prevent new
- // registrations.
- //
- // TODO(baptist/pav-kv): Look at optimizing to avoid the need for this to be
- // an atomic. It likely doesn't need to be atomic since the callers should
- // be holding Replica.raftMu and/or Replica.mu.
- numRegisteredReplica atomic.Int64
-
- // ctx is the ambient context for the replica and is used for remote
- // traces. It contains the replica/range information. On each trace we
- // additionally append the unique trace/span IDs.
- ctx context.Context
- st *cluster.Settings
-
- tracer *tracing.Tracer
-}
-
-// NewRaftTracer creates a new RaftTracer with the given ambient context for the
-// replica.
-func NewRaftTracer(
- ctx context.Context,
- tracer *tracing.Tracer,
- st *cluster.Settings,
- numRegisteredStore *atomic.Int64,
-) *RaftTracer {
- return &RaftTracer{ctx: ctx, tracer: tracer, st: st, numRegisteredStore: numRegisteredStore}
-}
-
-// reserveSpace checks if should register a new trace. If there are too many
-// registered traces it will not register and return false. The soft invariant
-// is that numRegisteredStore <= numAllowed which can be temporarily violated if
-// MaxConcurrentRaftTraces is lowered. This method will return true if we can
-// add one to the number registered for both the store and replica, otherwise it
-// will return false. This method is optimized for the `numAllowed == 0` case
-// and avoids loading `numRegisteredStore` until after this check.`
-func (r *RaftTracer) reserveSpace() bool {
- numAllowed := MaxConcurrentRaftTraces.Get(&r.st.SV)
- numRegisteredReplica := r.numRegisteredReplica.Load()
-
- // This can only occur if the numAllowed setting has changed since a
- // previous call to reserveSpace. If this happens flush all our current
- // traces and don't register this request. Note that when this happens we
- // also wont't log this request.
- if numRegisteredReplica > numAllowed {
- log.Infof(r.ctx, "flushing all traces due to setting change")
- r.m.Range(func(index kvpb.RaftIndex, t *traceValue) bool {
- r.removeEntry(index)
- return true
- })
- return false
- }
-
- if numAllowed == 0 {
- return false
- }
-
- // The maximum number of traces has been reached for the store. We don't
- // register tracing and return false.
- numRegisteredStore := r.numRegisteredStore.Load()
- if numRegisteredStore >= numAllowed {
- return false
- }
-
- // Only increment the number of registered traces if the numRegistered
- // hasn't changed. In the case of an ABA update, it does not break the
- // invariant since some other trace was registered and deregistered, but
- // there is still a slot available. We will not register this trace if
- // someone else is concurrently registering a trace on this store, but this
- // is acceptable as it is a rare case.
- registerSucceeded := r.numRegisteredStore.CompareAndSwap(numRegisteredStore, numRegisteredStore+1)
- if registerSucceeded {
- // Add one unconditionally to the replica count.
- r.numRegisteredReplica.Add(1)
- }
- // Note we can't assert numRegisteredStore <= numAllowed because if the
- // setting is changed it can be temporarily violated on other replicas.
- return registerSucceeded
-}
-
-// tryStore attempts to store this value. If the index is already in the map it
-// will not store this entry and return false. It will also decrement counters
-// that were incremented by reserveSpace.
-// This is a rare case where we already have the index in the map. We
-// don't want to store this entry, but also need to decrement the
-// counter to avoid double tracing.
-func (r *RaftTracer) tryStore(tv *traceValue) (*traceValue, bool) {
- if existingTv, loaded := r.m.LoadOrStore(tv.traced.Index, tv); loaded {
- tv.logf(2, "duplicate registration ignored - existing trace: %s", existingTv)
- existingTv.logf(2, "additional registration for same index: %s", tv)
- r.destroy(tv)
- return existingTv, false
- }
- return tv, true
-}
-
-// newTraceValue creates a new traceValue for the given traced entry. Note that
-// it doesn't pass `propCtx` as the first parameter since this isn't the
-// relevant context that should be used for logging and it can be nil.
-func (r *RaftTracer) newTraceValue(
- te kvserverpb.TracedEntry, propCtx context.Context, propSpan *tracing.Span,
-) *traceValue {
- tv := &traceValue{traced: te}
- tv.ctx = logtags.AddTag(r.ctx, "id", redact.Safe(tv.String()))
- tv.mu.seenMsgAppResp = make(map[raftpb.PeerID]bool)
- tv.mu.propCtx = propCtx
- tv.mu.propSpan = propSpan
- return tv
-}
-
-// RegisterRemote registers a remote trace. This is called when we receive a
-// raft message over the wire with a request to continue tracing it.
-func (r *RaftTracer) RegisterRemote(te kvserverpb.TracedEntry) {
- if !r.reserveSpace() {
- return
- }
- // NB: We don't currently return remote traces, if we did, we would pass the
- // remote ctx here and trace it.
- if tv, created := r.tryStore(r.newTraceValue(te, nil, nil)); created {
- tv.logf(1, "registering remote trace %s", tv)
- }
-}
-
-// MaybeRegister is called on an entry that has been proposed to raft. This will
-// begin logging all subsequent updates to this entry. It returns true if the
-// registration is successful. A duplicate registration of the same index is
-// considered a success and returns true, however the older registration is kept
-// and this registration is ignored.
-func (r *RaftTracer) MaybeRegister(ctx context.Context, ent raftpb.Entry) bool {
- // If the index is nil, then we can't trace this entry. This can happen if
- // there is a leader/leaseholder spilt. We don't have an easy way to handle
- // this today, so don't attempt to trace it.
- if ent.Index == 0 {
- log.VEvent(ctx, 2, "skip registering raft proposal without index")
- return false
- }
-
- // Only register the entry if this is a traced context with verbose logging.
- span := tracing.SpanFromContext(ctx)
- if span == nil || span.RecordingType() != tracingpb.RecordingVerbose {
- return false
- }
-
- // This must be the last conditional. If this returns true we must call
- // storeEntryWithTracing to not leak a registered permit.
- if !r.reserveSpace() {
- log.VEvent(ctx, 2, "too many active raft traces, skipping")
- return false
- }
-
- ctx, span = r.tracer.StartSpanCtx(ctx, "raft trace",
- tracing.WithParent(span), tracing.WithFollowsFrom())
- if tv, created := r.tryStore(r.newTraceValue(kvserverpb.TracedEntry{
- Index: kvpb.RaftIndex(ent.Index),
- TraceID: span.TraceID(),
- SpanID: span.SpanID(),
- }, ctx, span)); created {
- tv.logf(1, "registering local trace %s", tv)
- }
- return true
-}
-
-// MaybeTrace logs the message in every trace it is relevant to.
-func (r *RaftTracer) MaybeTrace(m raftpb.Message) []kvserverpb.TracedEntry {
- // NB: This check is an optimization to handle the common case where there
- // are no registered traces on this replica.
- if r.numRegisteredReplica.Load() == 0 {
- return nil
- }
-
- switch m.Type {
- case raftpb.MsgProp, raftpb.MsgApp, raftpb.MsgStorageAppend, raftpb.MsgStorageApply:
- return r.traceIfCovered(m)
- case raftpb.MsgAppResp, raftpb.MsgStorageAppendResp, raftpb.MsgStorageApplyResp:
- r.traceIfPast(m)
- return nil
- }
- return nil
-}
-
-// removeEntry removes the trace at the given index and decrements the
-// registered counters at the replica and store level.
-func (r *RaftTracer) removeEntry(index kvpb.RaftIndex) {
- tv, found := r.m.LoadAndDelete(index)
- if !found {
- return
- }
- // Don't allow additional tracing to this context.
- r.destroy(tv)
-}
-
-func (r *RaftTracer) destroy(tv *traceValue) {
- r.numRegisteredReplica.Add(-1)
- r.numRegisteredStore.Add(-1)
-
- tv.mu.Lock()
- defer tv.mu.Unlock()
- if tv.mu.propSpan != nil {
- tv.mu.propSpan.Finish()
- tv.mu.propCtx = nil
- tv.mu.propSpan = nil
- }
-}
-
-// Close will unregister all the currently active traces and prevent additional
-// traces from being added. It is safe to call multiple times, but should always
-// be called at least once when the replica is destroyed to prevent leaking
-// traces.
-// Note that there could be a race between another caller calling Register and
-// us closing the tracer, however we won't allow any new registrations to come
-// through after this call. Note that we set this to MaxInt32 instead of
-// MaxInt64 to avoid a rare race where another thread is in the middle of
-// `reserveSpace` and calls `Add(1)` which cause overflow.
-func (r *RaftTracer) Close() {
- r.numRegisteredReplica.Store(math.MaxInt32)
-
- r.m.Range(func(index kvpb.RaftIndex, t *traceValue) bool {
- t.logf(2, "cleanup log index %d during Close", index)
- r.removeEntry(index)
- return true
- })
-}
-
-func peer(p raftpb.PeerID) redact.SafeString {
- return redact.SafeString(raft.DescribeTarget(p))
-}
-
-// traceIfCovered will log the message if it touches any of the registered trace
-// points. Additionally it returns any saved trace/span IDs for sending to
-// remote nodes. This applies both to messages that the leader sends to
-// followers, and messages replicas send to their local storage.
-func (r *RaftTracer) traceIfCovered(m raftpb.Message) []kvserverpb.TracedEntry {
- if len(m.Entries) == 0 {
- return nil
- }
- minEntryIndex := kvpb.RaftIndex(m.Entries[0].Index)
- maxEntryIndex := kvpb.RaftIndex(m.Entries[len(m.Entries)-1].Index)
- var tracedEntries []kvserverpb.TracedEntry
- r.m.Range(func(index kvpb.RaftIndex, t *traceValue) bool {
- // If the traced index is not in the range of the entries, we can skip
- // it. We don't need to check each individual entry since they are
- // contiguous.
- if t.traced.Index < minEntryIndex || t.traced.Index > maxEntryIndex {
- return true
- }
- tracedEntries = append(tracedEntries, t.traced)
- // TODO(baptist): Not all the fields are relevant to log for all
- // message types. Consider cleaning up what is logged.
- t.logf(4,
- "%s->%s %v Term:%d Log:%d/%d Entries:[%d-%d]",
- peer(m.From),
- peer(m.To),
- m.Type,
- m.Term,
- m.LogTerm,
- m.Index,
- minEntryIndex,
- maxEntryIndex,
- )
- return true
- })
- return tracedEntries
-}
-
-// traceIfPast will log the message to all registered traceValues the message is
-// past. It will additionally unregister traces that are no longer useful. This
-// call is for events that move the needle/watermark forward (e.g. the log
-// storage syncs), but don't have an exact range of entries affected. So, being
-// unable to match these events to entries exactly once, we instead check that
-// the watermark passed the entry. To protect against overly verbose logging, we
-// only allow MsgAppResp to be logged once per peer, and only one
-// MsgStorageAppendResp. When we receive a MsgStorageApplyResp we will log and
-// unregister the tracing.
-func (r *RaftTracer) traceIfPast(m raftpb.Message) {
- if m.Reject {
- return
- }
- r.m.Range(func(index kvpb.RaftIndex, t *traceValue) bool {
- switch m.Type {
- case raftpb.MsgAppResp:
- if kvpb.RaftIndex(m.Index) >= index && !t.seenMsgAppResp(m.From) {
- t.logf(4,
- "%s->%s %v Term:%d Index:%d",
- peer(m.From),
- peer(m.To),
- m.Type,
- m.Term,
- m.Index,
- )
- }
- case raftpb.MsgStorageAppendResp:
- if kvpb.RaftIndex(m.Index) >= index && !t.seenMsgStorageAppendResp() {
- t.logf(4,
- "%s->%s %v Log:%d/%d",
- peer(m.From),
- peer(m.To),
- m.Type,
- m.LogTerm,
- m.Index,
- )
- }
- case raftpb.MsgStorageApplyResp:
- if len(m.Entries) == 0 {
- return true
- }
- // Use the last entry to determine if we should log this message.
- msgIndex := m.Entries[len(m.Entries)-1].Index
- if kvpb.RaftIndex(msgIndex) >= index {
- t.logf(4,
- "%s->%s %v LastEntry:%d/%d",
- peer(m.From),
- peer(m.To),
- m.Type,
- m.Entries[len(m.Entries)-1].Term,
- m.Entries[len(m.Entries)-1].Index,
- )
- // We unregister the index here because we are now "done" with
- // this entry and don't expect more useful events.
- t.logf(4, "unregistered log index %d from tracing", index)
- r.removeEntry(index)
- }
- }
- return true
- })
-}
diff --git a/pkg/kv/kvserver/rafttrace/rafttrace_test.go b/pkg/kv/kvserver/rafttrace/rafttrace_test.go
deleted file mode 100644
index 59fdd5a9e1d3..000000000000
--- a/pkg/kv/kvserver/rafttrace/rafttrace_test.go
+++ /dev/null
@@ -1,344 +0,0 @@
-// Copyright 2024 The Cockroach Authors.
-//
-// Use of this software is governed by the CockroachDB Software License
-// included in the /LICENSE file.
-
-package rafttrace
-
-import (
- "context"
- "sync/atomic"
- "testing"
-
- "github.com/cockroachdb/cockroach/pkg/kv/kvpb"
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
- "github.com/cockroachdb/cockroach/pkg/raft/raftpb"
- "github.com/cockroachdb/cockroach/pkg/settings/cluster"
- "github.com/cockroachdb/cockroach/pkg/testutils"
- "github.com/cockroachdb/cockroach/pkg/util/tracing"
- "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
- "github.com/stretchr/testify/require"
-)
-
-func createTracer(count int64) *RaftTracer {
- ctx := context.Background()
- tracer := tracing.NewTracer()
- st := cluster.MakeTestingClusterSettings()
- MaxConcurrentRaftTraces.Override(ctx, &st.SV, count)
- numRegisteredStore := atomic.Int64{}
- return NewRaftTracer(ctx, tracer, st, &numRegisteredStore)
-}
-
-func TestRegisterRemote(t *testing.T) {
- rt := createTracer(10)
-
- te := kvserverpb.TracedEntry{Index: 1, TraceID: 123, SpanID: 456}
- rt.RegisterRemote(te)
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
- require.Equal(t, int64(1), rt.numRegisteredReplica.Load())
-}
-
-func TestMaybeRegisterNoSpan(t *testing.T) {
- rt := createTracer(10)
-
- // Test without a span in context
- ctx := context.Background()
- require.False(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1}))
- require.Equal(t, int64(0), rt.numRegisteredStore.Load())
- require.Equal(t, int64(0), rt.numRegisteredReplica.Load())
-}
-
-func TestMaybeRegisterWithSpan(t *testing.T) {
- rt := createTracer(10)
-
- ctx := context.Background()
- // Test with a span in the context.
- ctx, span := rt.tracer.StartSpanCtx(ctx, "test-span", tracing.WithRecording(tracingpb.RecordingVerbose))
- defer span.Finish()
-
- require.True(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1}))
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
- require.Equal(t, int64(1), rt.numRegisteredReplica.Load())
-}
-
-func TestMaybeTraceNoSpan(t *testing.T) {
- rt := createTracer(10)
- ctx := context.Background()
-
- ent := raftpb.Entry{Index: 1}
- require.False(t, rt.MaybeRegister(ctx, ent))
- require.Empty(t, rt.MaybeTrace(raftpb.Message{Type: raftpb.MsgApp, Entries: []raftpb.Entry{ent}}))
-}
-
-func TestMaybeTraceWithSpan(t *testing.T) {
- rt := createTracer(10)
- ctx, span := rt.tracer.StartSpanCtx(context.Background(), "test-span", tracing.WithRecording(tracingpb.RecordingVerbose))
- defer span.Finish()
-
- ent := raftpb.Entry{Index: 1}
- require.True(t, rt.MaybeRegister(ctx, ent))
- tracedEntries := rt.MaybeTrace(raftpb.Message{
- Type: raftpb.MsgApp,
- Entries: []raftpb.Entry{ent},
- })
- require.Len(t, tracedEntries, 1)
- require.Equal(t, kvpb.RaftIndex(1), tracedEntries[0].Index)
-}
-
-func TestClose(t *testing.T) {
- rt := createTracer(10)
- ctx, span := rt.tracer.StartSpanCtx(context.Background(), "test-span", tracing.WithRecording(tracingpb.RecordingVerbose))
- defer span.Finish()
-
- require.True(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1}))
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
- require.Equal(t, int64(1), rt.numRegisteredReplica.Load())
-
- rt.Close()
- require.Equal(t, int64(0), rt.numRegisteredStore.Load())
- require.Greater(t, rt.numRegisteredReplica.Load(), int64(1000))
-}
-
-func TestTwoTracersSharingNumRegisteredStore(t *testing.T) {
- numRegisteredStore := atomic.Int64{}
- ctx := context.Background()
- tracer := tracing.NewTracer()
- st := cluster.MakeTestingClusterSettings()
- MaxConcurrentRaftTraces.Override(ctx, &st.SV, 3)
-
- rt1 := NewRaftTracer(ctx, tracer, st, &numRegisteredStore)
- rt2 := NewRaftTracer(ctx, tracer, st, &numRegisteredStore)
-
- // Register a trace in the first tracer.
- ctx1, span1 := rt1.tracer.StartSpanCtx(ctx, "test-span-1", tracing.WithRecording(tracingpb.RecordingVerbose))
- defer span1.Finish()
- require.True(t, rt1.MaybeRegister(ctx1, raftpb.Entry{Index: 1}))
- require.Equal(t, int64(1), rt1.numRegisteredStore.Load())
- require.Equal(t, int64(1), rt1.numRegisteredReplica.Load())
-
- // Register a trace in the second tracer.
- ctx2, span2 := rt2.tracer.StartSpanCtx(ctx, "test-span-2", tracing.WithRecording(tracingpb.RecordingVerbose))
- defer span2.Finish()
- require.True(t, rt2.MaybeRegister(ctx2, raftpb.Entry{Index: 2}))
- require.Equal(t, int64(2), rt2.numRegisteredStore.Load())
- require.Equal(t, int64(1), rt2.numRegisteredReplica.Load())
-
- // Ensure both tracers share the same numRegisteredStore.
- require.Equal(t, rt1.numRegisteredStore, rt2.numRegisteredStore)
-
- // Close the first tracer and check the counts.
- rt1.Close()
- require.Equal(t, int64(1), rt2.numRegisteredStore.Load())
- require.Greater(t, rt1.numRegisteredReplica.Load(), int64(1000))
- require.Equal(t, int64(1), rt2.numRegisteredReplica.Load())
-
- // Close the second tracer and check the counts.
- rt2.Close()
- require.Equal(t, int64(0), rt2.numRegisteredStore.Load())
- require.Greater(t, rt2.numRegisteredReplica.Load(), int64(1000))
-}
-
-func TestLimit(t *testing.T) {
- rt := createTracer(2)
- ctx1, span1 := rt.tracer.StartSpanCtx(context.Background(), "test-span", tracing.WithRecording(tracingpb.RecordingVerbose))
- defer span1.Finish()
- // Only 2 traces are allowed but we attempt to register 3.
- require.True(t, rt.MaybeRegister(ctx1, raftpb.Entry{Index: 1}))
- require.True(t, rt.MaybeRegister(ctx1, raftpb.Entry{Index: 2}))
- require.False(t, rt.MaybeRegister(ctx1, raftpb.Entry{Index: 3}))
- rt.Close()
- require.Equal(t, int64(0), rt.numRegisteredStore.Load())
- require.Greater(t, rt.numRegisteredReplica.Load(), int64(1000))
-}
-
-func TestMaybeTraceMsgAppResp(t *testing.T) {
- rt := createTracer(10)
- ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test")
-
- require.True(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1}))
- require.Empty(t, rt.MaybeTrace(raftpb.Message{
- Term: 1,
- From: 1,
- To: 2,
- Type: raftpb.MsgAppResp,
- Index: uint64(5),
- }), 0)
- output := finish().String()
- require.NoError(t, testutils.MatchInOrder(output, []string{"1->2 MsgAppResp Term:1 Index:5"}...))
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
-}
-
-func TestDupeMsgAppResp(t *testing.T) {
- rt := createTracer(10)
- ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test")
-
- ent := raftpb.Entry{Index: 1}
- require.True(t, rt.MaybeRegister(ctx, ent))
- require.Empty(t, rt.MaybeTrace(raftpb.Message{
- Term: 1,
- From: 1,
- To: 2,
- Type: raftpb.MsgAppResp,
- Index: uint64(5),
- }))
- // The second message should not trace.
- require.Empty(t, rt.MaybeTrace(raftpb.Message{
- Term: 1,
- From: 1,
- To: 2,
- Type: raftpb.MsgAppResp,
- Index: uint64(6),
- }))
-
- output := finish().String()
- require.NoError(t, testutils.MatchInOrder(output, []string{"1->2 MsgAppResp Term:1 Index:5"}...))
- require.Error(t, testutils.MatchInOrder(output, []string{"1->2 MsgAppResp Term:1 Index:6"}...))
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
-}
-
-func TestTraceMsgStorageAppendResp(t *testing.T) {
- rt := createTracer(10)
- ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test")
-
- ent := raftpb.Entry{Index: 1}
- require.True(t, rt.MaybeRegister(ctx, ent))
- require.Empty(t, rt.MaybeTrace(raftpb.Message{
- From: 1,
- To: 2,
- Term: 3,
- Type: raftpb.MsgStorageAppendResp,
- Index: uint64(5),
- LogTerm: uint64(4),
- }))
-
- output := finish().String()
- require.NoError(t, testutils.MatchInOrder(output, []string{"1->2 MsgStorageAppendResp Log:4/5"}...))
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
-}
-
-func TestDupeMsgStorageAppendResp(t *testing.T) {
- rt := createTracer(10)
- ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test")
-
- ent := raftpb.Entry{Index: 1}
- require.True(t, rt.MaybeRegister(ctx, ent))
- require.Empty(t, rt.MaybeTrace(raftpb.Message{
- From: 1,
- To: 2,
- Term: 3,
- Type: raftpb.MsgStorageAppendResp,
- Index: uint64(5),
- LogTerm: uint64(4),
- }))
- // The second messsage should not trace.
- require.Empty(t, rt.MaybeTrace(raftpb.Message{
- From: 5,
- To: 6,
- Term: 7,
- Type: raftpb.MsgStorageAppendResp,
- Index: uint64(8),
- LogTerm: uint64(9),
- }))
-
- output := finish().String()
- require.NoError(t, testutils.MatchInOrder(output, []string{"1->2 MsgStorageAppendResp Log:4/5"}...))
- require.Error(t, testutils.MatchInOrder(output, []string{"5->6 MsgStorageAppendResp"}...))
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
-}
-
-func TestNoTraceMsgStorageAppendResp(t *testing.T) {
- rt := createTracer(10)
- ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test")
-
- ent := raftpb.Entry{Index: 10}
- require.True(t, rt.MaybeRegister(ctx, ent))
-
- // This doesn't trace since the index is behind the entry index.
- require.Empty(t, rt.MaybeTrace(raftpb.Message{
- From: 1,
- To: 2,
- Term: 3,
- Type: raftpb.MsgStorageAppendResp,
- Index: uint64(5),
- LogTerm: uint64(4),
- }))
-
- output := finish().String()
- require.Error(t, testutils.MatchInOrder(output, []string{"MsgStorageAppendResp"}...))
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
-}
-
-func TestTraceMsgStorageApplyResp(t *testing.T) {
- rt := createTracer(10)
- ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test")
-
- require.True(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1}))
- require.Empty(t, rt.MaybeTrace(
- raftpb.Message{
- From: 1,
- To: 2,
- Type: raftpb.MsgStorageApplyResp,
- Entries: []raftpb.Entry{
- {Term: 1, Index: 1},
- {Term: 2, Index: 4},
- },
- }))
-
- output := finish().String()
- require.NoError(t, testutils.MatchInOrder(output,
- []string{
- `1->2 MsgStorageApplyResp LastEntry:2/4`,
- `unregistered log index`,
- }...))
- require.Equal(t, int64(0), rt.numRegisteredStore.Load())
-}
-
-func TestDuplicateIndex(t *testing.T) {
- rt := createTracer(10)
- ctx1, trace1 := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "trace1")
- require.True(t, rt.MaybeRegister(ctx1, raftpb.Entry{Index: 1}))
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
- require.Equal(t, int64(1), rt.numRegisteredReplica.Load())
- // This returns true indicating that the index is registered, but it doesn't
- // add a new registration.
- ctx2, trace2 := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "trace2")
- require.True(t, rt.MaybeRegister(ctx2, raftpb.Entry{Index: 1}))
- require.Equal(t, int64(1), rt.numRegisteredStore.Load())
- require.Equal(t, int64(1), rt.numRegisteredReplica.Load())
-
- // Unregister the entry with a MsgStorageApplyResp.
- require.Empty(t, rt.MaybeTrace(
- raftpb.Message{
- From: 1,
- To: 2,
- Type: raftpb.MsgStorageApplyResp,
- Entries: []raftpb.Entry{
- {Term: 1, Index: 1},
- {Term: 2, Index: 4},
- },
- }))
- // We expect the logs to go to the first trace.
- output1 := trace1().String()
- output2 := trace2().String()
- require.NoError(t, testutils.MatchInOrder(output1,
- []string{
- `1->2 MsgStorageApplyResp LastEntry:2/4`,
- `unregistered log index`,
- }...))
- require.NoError(t, testutils.MatchInOrder(output1,
- []string{
- `additional registration for same index`,
- }...))
- require.Error(t, testutils.MatchInOrder(output2,
- []string{
- `1->2 MsgStorageApplyResp LastEntry:2/4`,
- `unregistered log index`,
- }...))
- require.NoError(t, testutils.MatchInOrder(output2,
- []string{
- `duplicate registration ignored`,
- }...))
-
- require.Equal(t, int64(0), rt.numRegisteredStore.Load())
- require.Equal(t, int64(0), rt.numRegisteredReplica.Load())
-}
diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go
index 0b2fbc22408d..1eb4a3369dfc 100644
--- a/pkg/kv/kvserver/replica.go
+++ b/pkg/kv/kvserver/replica.go
@@ -31,7 +31,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/load"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore"
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/split"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
@@ -892,10 +891,6 @@ type Replica struct {
// MsgAppPull <=> LazyReplication.
// Updated with both raftMu and mu held.
currentRACv2Mode rac2.RaftMsgAppMode
-
- // raftTracer is used to trace raft messages that are sent with a
- // tracing context.
- raftTracer rafttrace.RaftTracer
}
// The raft log truncations that are pending. Access is protected by its own
diff --git a/pkg/kv/kvserver/replica_application_decoder.go b/pkg/kv/kvserver/replica_application_decoder.go
index 5b9cdf49cb5b..3586137f7c7b 100644
--- a/pkg/kv/kvserver/replica_application_decoder.go
+++ b/pkg/kv/kvserver/replica_application_decoder.go
@@ -145,7 +145,7 @@ func (d *replicaDecoder) createTracingSpans(ctx context.Context) {
propCtx := ctx // raft scheduler's ctx
var propSp *tracing.Span
// If the client has a trace, put a child into propCtx.
- if sp := tracing.SpanFromContext(cmd.proposal.Context()); sp != nil {
+ if sp := tracing.SpanFromContext(cmd.proposal.ctx); sp != nil {
propCtx, propSp = sp.Tracer().StartSpanCtx(
propCtx, "local proposal", tracing.WithParent(sp),
)
diff --git a/pkg/kv/kvserver/replica_application_result.go b/pkg/kv/kvserver/replica_application_result.go
index 998f7258d3a5..4f85db924d3c 100644
--- a/pkg/kv/kvserver/replica_application_result.go
+++ b/pkg/kv/kvserver/replica_application_result.go
@@ -328,6 +328,7 @@ func (r *Replica) makeReproposal(origP *ProposalData) (reproposal *ProposalData,
// span "follows from" the proposal's span, if the proposal sticks around
// for (some reincarnation of) the command to eventually apply, its trace
// will reflect the reproposal as well.
+ ctx: origP.ctx,
idKey: raftlog.MakeCmdIDKey(),
proposedAtTicks: 0, // set in registerProposalLocked
createdAtTicks: 0, // set in registerProposalLocked
@@ -363,8 +364,6 @@ func (r *Replica) makeReproposal(origP *ProposalData) (reproposal *ProposalData,
seedProposal: seedP,
}
- origCtx := origP.Context()
- newProposal.ctx.Store(&origCtx)
return newProposal, func() {
// If the original proposal had an explicit span, it's an async consensus
@@ -395,8 +394,7 @@ func (r *Replica) makeReproposal(origP *ProposalData) (reproposal *ProposalData,
//
// TODO(radu): Should this context be created via tracer.ForkSpan?
// We'd need to make sure the span is finished eventually.
- ctx := r.AnnotateCtx(context.TODO())
- origP.ctx.Store(&ctx)
+ origP.ctx = r.AnnotateCtx(context.TODO())
seedP.lastReproposal = newProposal
}
}
diff --git a/pkg/kv/kvserver/replica_application_result_test.go b/pkg/kv/kvserver/replica_application_result_test.go
index 51a83b9a50bd..c5f2dfc996b4 100644
--- a/pkg/kv/kvserver/replica_application_result_test.go
+++ b/pkg/kv/kvserver/replica_application_result_test.go
@@ -37,7 +37,8 @@ func makeProposalData() *ProposalData {
AdmissionOriginNode: 1,
}
- prop := ProposalData{
+ return &ProposalData{
+ ctx: context.WithValue(context.Background(), struct{}{}, "nonempty-ctx"),
sp: &tracing.Span{},
idKey: "deadbeef",
proposedAtTicks: 1,
@@ -57,9 +58,6 @@ func makeProposalData() *ProposalData {
seedProposal: nil,
lastReproposal: nil,
}
- ctx := context.WithValue(context.Background(), struct{}{}, "nonempty-ctx")
- prop.ctx.Store(&ctx)
- return &prop
}
func TestProposalDataAndRaftCommandAreConsideredWhenAddingFields(t *testing.T) {
@@ -75,8 +73,8 @@ func TestProposalDataAndRaftCommandAreConsideredWhenAddingFields(t *testing.T) {
// NB: we can't use zerofields for two reasons: First, we have unexported fields
// here, and second, we don't want to check for recursively populated structs (but
// only for the top level fields).
- require.Equal(t, 10, reflect.Indirect(reflect.ValueOf(prop.command)).NumField())
- require.Equal(t, 19, reflect.Indirect(reflect.ValueOf(prop)).NumField())
+ require.Equal(t, 10, reflect.TypeOf(*prop.command).NumField())
+ require.Equal(t, 19, reflect.TypeOf(*prop).NumField())
}
func TestReplicaMakeReproposalChaininig(t *testing.T) {
@@ -86,7 +84,7 @@ func TestReplicaMakeReproposalChaininig(t *testing.T) {
var r Replica
proposals := make([]*ProposalData, 1, 4)
proposals[0] = makeProposalData()
- sharedCtx := proposals[0].Context()
+ sharedCtx := proposals[0].ctx
verify := func() {
seed := proposals[0]
@@ -104,9 +102,9 @@ func TestReplicaMakeReproposalChaininig(t *testing.T) {
}
// Only the latest reproposal must use the seed context.
for _, prop := range proposals[:len(proposals)-1] {
- require.NotEqual(t, sharedCtx, prop.Context())
+ require.NotEqual(t, sharedCtx, prop.ctx)
}
- require.Equal(t, sharedCtx, proposals[len(proposals)-1].Context())
+ require.Equal(t, sharedCtx, proposals[len(proposals)-1].ctx)
}
verify()
diff --git a/pkg/kv/kvserver/replica_destroy.go b/pkg/kv/kvserver/replica_destroy.go
index 3d730e240094..553b5e012fd7 100644
--- a/pkg/kv/kvserver/replica_destroy.go
+++ b/pkg/kv/kvserver/replica_destroy.go
@@ -181,5 +181,4 @@ func (r *Replica) disconnectReplicationRaftMuLocked(ctx context.Context) {
log.Fatalf(ctx, "removing raft group before destroying replica %s", r)
}
r.mu.internalRaftGroup = nil
- r.mu.raftTracer.Close()
}
diff --git a/pkg/kv/kvserver/replica_init.go b/pkg/kv/kvserver/replica_init.go
index 193f98f9e57c..c1691a2d903c 100644
--- a/pkg/kv/kvserver/replica_init.go
+++ b/pkg/kv/kvserver/replica_init.go
@@ -21,7 +21,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvstorage"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/load"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore"
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/split"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
"github.com/cockroachdb/cockroach/pkg/raft"
@@ -155,7 +154,7 @@ func newUninitializedReplicaWithoutRaftGroup(
}
// Expose proposal data for external test packages.
return store.cfg.TestingKnobs.TestingProposalSubmitFilter(kvserverbase.ProposalFilterArgs{
- Ctx: p.Context(),
+ Ctx: p.ctx,
RangeID: rangeID,
StoreID: store.StoreID(),
ReplicaID: replicaID,
@@ -329,7 +328,6 @@ func (r *Replica) initRaftGroupRaftMuLockedReplicaMuLocked() error {
return err
}
r.mu.internalRaftGroup = rg
- r.mu.raftTracer = *rafttrace.NewRaftTracer(ctx, r.Tracer, r.ClusterSettings(), &r.store.concurrentRaftTraces)
r.flowControlV2.InitRaftLocked(
ctx, replica_rac2.NewRaftNode(rg, (*replicaForRACv2)(r)), rg.LogMark())
return nil
diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go
index c0261d33b8eb..20881c02b945 100644
--- a/pkg/kv/kvserver/replica_proposal.go
+++ b/pkg/kv/kvserver/replica_proposal.go
@@ -9,7 +9,6 @@ import (
"context"
"os"
"path/filepath"
- "sync/atomic"
"time"
"github.com/cockroachdb/cockroach/pkg/keys"
@@ -117,15 +116,7 @@ type ProposalData struct {
// that during command application one should always use `replicatedCmd.ctx`
// for best coverage. `p.ctx` should be used when a `replicatedCmd` is not in
// scope, i.e. outside of raft command application.
- //
- // The context may be updated during the proposal lifecycle but will never
- // be nil. To clear out the context, set it to context.Background(). It is
- // protected by an atomic pointer because it can be read without holding the
- // raftMu. Use ProposalData.Context() to read it.
- //
- // TODO(baptist): Track down all the places where we read and write ctx and
- // determine whether we can convert this back to non-atomic field.
- ctx atomic.Pointer[context.Context]
+ ctx context.Context
// An optional tracing span bound to the proposal in the case of async
// consensus (it will be referenced by p.ctx). We need to finish this span
@@ -225,12 +216,6 @@ type ProposalData struct {
lastReproposal *ProposalData
}
-// Context returns the context associated with the proposal. The context may
-// change during the lifetime of the proposal.
-func (proposal *ProposalData) Context() context.Context {
- return *proposal.ctx.Load()
-}
-
// useReplicationAdmissionControl indicates whether this raft command should
// be subject to replication admission control.
func (proposal *ProposalData) useReplicationAdmissionControl() bool {
@@ -285,8 +270,7 @@ func (proposal *ProposalData) signalProposalResult(pr proposalResult) {
//
// NB: `proposal.ec.repl` might already have been cleared if we arrive here
// through finishApplication.
- ctx := context.Background()
- proposal.ctx.Store(&ctx)
+ proposal.ctx = context.Background()
}
}
@@ -1066,13 +1050,13 @@ func (r *Replica) requestToProposal(
// Fill out the results even if pErr != nil; we'll return the error below.
proposal := &ProposalData{
+ ctx: ctx,
idKey: idKey,
doneCh: make(chan proposalResult, 1),
Local: &res.Local,
Request: ba,
leaseStatus: *st,
}
- proposal.ctx.Store(&ctx)
if needConsensus {
proposal.command = &kvserverpb.RaftCommand{
diff --git a/pkg/kv/kvserver/replica_proposal_buf.go b/pkg/kv/kvserver/replica_proposal_buf.go
index 5d96eceb0098..633568fa7b5e 100644
--- a/pkg/kv/kvserver/replica_proposal_buf.go
+++ b/pkg/kv/kvserver/replica_proposal_buf.go
@@ -126,7 +126,6 @@ type singleBatchProposer interface {
getReplicaID() roachpb.ReplicaID
flowControlHandle(ctx context.Context) kvflowcontrol.Handle
onErrProposalDropped([]raftpb.Entry, []*ProposalData, raftpb.StateType)
- registerForTracing(*ProposalData, raftpb.Entry) bool
}
// A proposer is an object that uses a propBuf to coordinate Raft proposals.
@@ -256,7 +255,7 @@ func (b *propBuf) Insert(ctx context.Context, p *ProposalData, tok TrackedReques
}
if log.V(4) {
- log.Infof(p.Context(), "submitting proposal %x", p.idKey)
+ log.Infof(p.ctx, "submitting proposal %x", p.idKey)
}
// Insert the proposal into the buffer's array. The buffer now takes ownership
@@ -572,7 +571,7 @@ func (b *propBuf) FlushLockedWithRaftGroup(
Data: p.encodedCommand,
})
nextProp++
- log.VEvent(p.Context(), 2, "flushing proposal to Raft")
+ log.VEvent(p.ctx, 2, "flushing proposal to Raft")
// We don't want deduct flow tokens for reproposed commands, and of
// course for proposals that didn't integrate with kvflowcontrol.
@@ -582,7 +581,7 @@ func (b *propBuf) FlushLockedWithRaftGroup(
} else {
admitHandles = append(admitHandles, admitEntHandle{
handle: p.raftAdmissionMeta,
- pCtx: p.Context(),
+ pCtx: p.ctx,
})
}
}
@@ -870,34 +869,26 @@ func proposeBatch(
// TODO(bdarnell): Handle ErrProposalDropped better.
// https://github.com/cockroachdb/cockroach/issues/21849
for _, p := range props {
- log.Event(p.Context(), "entry dropped")
+ if p.ctx != nil {
+ log.Event(p.ctx, "entry dropped")
+ }
}
p.onErrProposalDropped(ents, props, raftGroup.BasicStatus().RaftState)
return nil //nolint:returnerrcheck
}
- if err != nil {
- return err
- }
- // Now that we know what raft log position[1] this proposal is to end up
- // in, deduct flow tokens for it. This is done without blocking (we've
- // already waited for available flow tokens pre-evaluation). The tokens
- // will later be returned once we're informed of the entry being
- // admitted below raft.
- //
- // [1]: We're relying on an undocumented side effect of upstream raft
- // API where it populates the index and term for the passed in
- // slice of entries. See etcd-io/raft#57.
- maybeDeductFlowTokens(ctx, p.flowControlHandle(ctx), handles, ents)
-
- // Register the proposal with rafttrace. This will add the trace to the raft
- // lifecycle. We trace at most one entry per batch, so break after the first
- // one is successfully registered.
- for i := range ents {
- if p.registerForTracing(props[i], ents[i]) {
- break
- }
+ if err == nil {
+ // Now that we know what raft log position[1] this proposal is to end up
+ // in, deduct flow tokens for it. This is done without blocking (we've
+ // already waited for available flow tokens pre-evaluation). The tokens
+ // will later be returned once we're informed of the entry being
+ // admitted below raft.
+ //
+ // [1]: We're relying on an undocumented side effect of upstream raft
+ // API where it populates the index and term for the passed in
+ // slice of entries. See etcd-io/raft#57.
+ maybeDeductFlowTokens(ctx, p.flowControlHandle(ctx), handles, ents)
}
- return nil
+ return err
}
func maybeDeductFlowTokens(
@@ -1184,10 +1175,6 @@ func (rp *replicaProposer) closedTimestampTarget() hlc.Timestamp {
return (*Replica)(rp).closedTimestampTargetRLocked()
}
-func (rp *replicaProposer) registerForTracing(p *ProposalData, e raftpb.Entry) bool {
- return (*Replica)(rp).mu.raftTracer.MaybeRegister(p.Context(), e)
-}
-
func (rp *replicaProposer) withGroupLocked(fn func(raftGroup proposerRaft) error) error {
return (*Replica)(rp).withRaftGroupLocked(func(raftGroup *raft.RawNode) (bool, error) {
// We're proposing a command here so there is no need to wake the leader
diff --git a/pkg/kv/kvserver/replica_proposal_buf_test.go b/pkg/kv/kvserver/replica_proposal_buf_test.go
index 126febaaa575..bdb47cb3a7eb 100644
--- a/pkg/kv/kvserver/replica_proposal_buf_test.go
+++ b/pkg/kv/kvserver/replica_proposal_buf_test.go
@@ -217,8 +217,6 @@ func (t *testProposer) campaignLocked(ctx context.Context) {
}
}
-func (t *testProposer) registerForTracing(*ProposalData, raftpb.Entry) bool { return true }
-
func (t *testProposer) rejectProposalWithErrLocked(_ context.Context, _ *ProposalData, err error) {
if t.onRejectProposalWithErrLocked == nil {
panic(fmt.Sprintf("unexpected rejectProposalWithErrLocked call: err=%v", err))
@@ -303,6 +301,7 @@ func (pc proposalCreator) newProposal(ba *kvpb.BatchRequest) *ProposalData {
}
}
p := &ProposalData{
+ ctx: context.Background(),
idKey: kvserverbase.CmdIDKey("test-cmd"),
command: &kvserverpb.RaftCommand{
ReplicatedEvalResult: kvserverpb.ReplicatedEvalResult{
@@ -314,8 +313,6 @@ func (pc proposalCreator) newProposal(ba *kvpb.BatchRequest) *ProposalData {
Request: ba,
leaseStatus: pc.lease,
}
- ctx := context.Background()
- p.ctx.Store(&ctx)
p.encodedCommand = pc.encodeProposal(p)
return p
}
diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go
index 505fdb840bda..7664c454797a 100644
--- a/pkg/kv/kvserver/replica_raft.go
+++ b/pkg/kv/kvserver/replica_raft.go
@@ -123,7 +123,7 @@ func (r *Replica) evalAndPropose(
idKey := raftlog.MakeCmdIDKey()
proposal, pErr := r.requestToProposal(ctx, idKey, ba, g, st, ui)
ba = proposal.Request // may have been updated
- log.Event(proposal.Context(), "evaluated request")
+ log.Event(proposal.ctx, "evaluated request")
// If the request hit a server-side concurrency retry error, immediately
// propagate the error. Don't assume ownership of the concurrency guard.
@@ -168,7 +168,7 @@ func (r *Replica) evalAndPropose(
// from this point on.
proposal.ec = makeReplicatedEndCmds(r, g, *st, timeutil.Now())
- log.VEventf(proposal.Context(), 2,
+ log.VEventf(proposal.ctx, 2,
"proposing command to write %d new keys, %d new values, %d new intents, "+
"write batch size=%d bytes",
proposal.command.ReplicatedEvalResult.Delta.KeyCount,
@@ -204,9 +204,7 @@ func (r *Replica) evalAndPropose(
// Fork the proposal's context span so that the proposal's context
// can outlive the original proposer's context.
- ctx, sp := tracing.ForkSpan(ctx, "async consensus")
- proposal.ctx.Store(&ctx)
- proposal.sp = sp
+ proposal.ctx, proposal.sp = tracing.ForkSpan(ctx, "async consensus")
if proposal.sp != nil {
// We can't leak this span if we fail to hand the proposal to the
// replication layer, so finish it later in this method if we are to
@@ -281,7 +279,7 @@ func (r *Replica) evalAndPropose(
"command is too large: %d bytes (max: %d)", quotaSize, maxSize,
))
}
- log.VEventf(proposal.Context(), 2, "acquiring proposal quota (%d bytes)", quotaSize)
+ log.VEventf(proposal.ctx, 2, "acquiring proposal quota (%d bytes)", quotaSize)
var err error
proposal.quotaAlloc, err = r.maybeAcquireProposalQuota(ctx, ba, quotaSize)
if err != nil {
@@ -351,8 +349,7 @@ func (r *Replica) evalAndPropose(
}
// TODO(radu): Should this context be created via tracer.ForkSpan?
// We'd need to make sure the span is finished eventually.
- ctx := r.AnnotateCtx(context.TODO())
- last.ctx.Store(&ctx)
+ last.ctx = r.AnnotateCtx(context.TODO())
}
return proposalCh, abandon, idKey, writeBytes, nil
}
@@ -399,12 +396,12 @@ func (r *Replica) propose(
log.Errorf(ctx, "%v", err)
return kvpb.NewError(err)
}
- log.KvDistribution.Infof(p.Context(), "proposing %s", crt)
+ log.KvDistribution.Infof(p.ctx, "proposing %s", crt)
} else if p.command.ReplicatedEvalResult.AddSSTable != nil {
- log.VEvent(p.Context(), 4, "sideloadable proposal detected")
+ log.VEvent(p.ctx, 4, "sideloadable proposal detected")
r.store.metrics.AddSSTableProposals.Inc(1)
} else if log.V(4) {
- log.Infof(p.Context(), "proposing command %x: %s", p.idKey, p.Request.Summary())
+ log.Infof(p.ctx, "proposing command %x: %s", p.idKey, p.Request.Summary())
}
raftAdmissionMeta := p.raftAdmissionMeta
@@ -433,7 +430,7 @@ func (r *Replica) propose(
// Too verbose even for verbose logging, so manually enable if you want to
// debug proposal sizes.
if false {
- log.Infof(p.Context(), `%s: proposal: %d
+ log.Infof(p.ctx, `%s: proposal: %d
RaftCommand.ReplicatedEvalResult: %d
RaftCommand.ReplicatedEvalResult.Delta: %d
RaftCommand.WriteBatch: %d
@@ -450,7 +447,7 @@ func (r *Replica) propose(
// TODO(tschottdorf): can we mark them so lightstep can group them?
const largeProposalEventThresholdBytes = 2 << 19 // 512kb
if ln := len(p.encodedCommand); ln > largeProposalEventThresholdBytes {
- log.Eventf(p.Context(), "proposal is large: %s", humanizeutil.IBytes(int64(ln)))
+ log.Eventf(p.ctx, "proposal is large: %s", humanizeutil.IBytes(int64(ln)))
}
// Insert into the proposal buffer, which passes the command to Raft to be
@@ -459,7 +456,7 @@ func (r *Replica) propose(
//
// NB: we must not hold r.mu while using the proposal buffer, see comment
// on the field.
- log.VEvent(p.Context(), 2, "submitting proposal to proposal buffer")
+ log.VEvent(p.ctx, 2, "submitting proposal to proposal buffer")
if err := r.mu.proposalBuf.Insert(ctx, p, tok.Move(ctx)); err != nil {
return kvpb.NewError(err)
}
@@ -638,11 +635,6 @@ func (r *Replica) stepRaftGroupRaftMuLocked(req *kvserverpb.RaftMessageRequest)
var sideChannelInfo replica_rac2.SideChannelInfoUsingRaftMessageRequest
var admittedVector rac2.AdmittedVector
err := r.withRaftGroup(func(raftGroup *raft.RawNode) (bool, error) {
- // If this message requested tracing, begin tracing it.
- for _, e := range req.TracedEntries {
- r.mu.raftTracer.RegisterRemote(e)
- }
- r.mu.raftTracer.MaybeTrace(req.Message)
// We're processing an incoming raft message (from a batch that may
// include MsgVotes), so don't campaign if we wake up our raft
// group.
@@ -1007,7 +999,8 @@ func (r *Replica) handleRaftReadyRaftMuLocked(
// Even if we don't have a Ready, or entries in Ready,
// replica_rac2.Processor may need to do some work.
raftEvent := rac2.RaftEventFromMsgStorageAppendAndMsgApps(
- rac2ModeForReady, r.ReplicaID(), msgStorageAppend, outboundMsgs, logSnapshot,
+ rac2ModeForReady, r.ReplicaID(), msgStorageAppend, outboundMsgs,
+ replica_rac2.RaftLogSnapshot(logSnapshot),
r.raftMu.msgAppScratchForFlowControl, replicaStateInfoMap)
r.flowControlV2.HandleRaftReadyRaftMuLocked(ctx, raftNodeBasicState, raftEvent)
if !hasReady {
@@ -1216,7 +1209,6 @@ func (r *Replica) handleRaftReadyRaftMuLocked(
}
}
- r.mu.raftTracer.MaybeTrace(msgStorageAppend)
if state, err = s.StoreEntries(ctx, state, app, cb, &stats.append); err != nil {
return stats, errors.Wrap(err, "while storing log entries")
}
@@ -1248,7 +1240,6 @@ func (r *Replica) handleRaftReadyRaftMuLocked(
stats.tApplicationBegin = timeutil.Now()
if hasMsg(msgStorageApply) {
- r.mu.raftTracer.MaybeTrace(msgStorageApply)
r.traceEntries(msgStorageApply.Entries, "committed, before applying any entries")
err := appTask.ApplyCommittedEntries(ctx)
@@ -1569,7 +1560,7 @@ func (r *Replica) processRACv2RangeController(ctx context.Context) {
}
}
r.flowControlV2.ProcessSchedulerEventRaftMuLocked(
- ctx, r.mu.currentRACv2Mode, logSnapshot)
+ ctx, r.mu.currentRACv2Mode, replica_rac2.RaftLogSnapshot(logSnapshot))
}
// SendMsgApp implements rac2.MsgAppSender.
@@ -1667,7 +1658,7 @@ func (r *Replica) refreshProposalsLocked(
// up here too.
if p.command.MaxLeaseIndex <= r.shMu.state.LeaseAppliedIndex {
r.cleanupFailedProposalLocked(p)
- log.Eventf(p.Context(), "retry proposal %x: %s", p.idKey, reason)
+ log.Eventf(p.ctx, "retry proposal %x: %s", p.idKey, reason)
p.finishApplication(ctx, makeProposalResultErr(
kvpb.NewAmbiguousResultErrorf(
"unable to determine whether command was applied via snapshot",
@@ -1735,7 +1726,7 @@ func (r *Replica) refreshProposalsLocked(
// definitely required, however.
sort.Sort(reproposals)
for _, p := range reproposals {
- log.Eventf(p.Context(), "re-submitting command %x (MLI %d, CT %s): %s",
+ log.Eventf(p.ctx, "re-submitting command %x (MLI %d, CT %s): %s",
p.idKey, p.command.MaxLeaseIndex, p.command.ClosedTimestamp, reason)
if err := r.mu.proposalBuf.ReinsertLocked(ctx, p); err != nil {
r.cleanupFailedProposalLocked(p)
@@ -1998,7 +1989,6 @@ func (r *Replica) deliverLocalRaftMsgsRaftMuLockedReplicaMuLocked(
}
for i, m := range localMsgs {
- r.mu.raftTracer.MaybeTrace(m)
if err := raftGroup.Step(m); err != nil {
log.Fatalf(ctx, "unexpected error stepping local raft message [%s]: %v",
raft.DescribeMessage(m, raftEntryFormatter), err)
@@ -2022,7 +2012,6 @@ func (r *Replica) sendRaftMessage(
lastToReplica, lastFromReplica := r.getLastReplicaDescriptors()
r.mu.RLock()
- traced := r.mu.raftTracer.MaybeTrace(msg)
fromReplica, fromErr := r.getReplicaDescriptorByIDRLocked(roachpb.ReplicaID(msg.From), lastToReplica)
toReplica, toErr := r.getReplicaDescriptorByIDRLocked(roachpb.ReplicaID(msg.To), lastFromReplica)
var startKey roachpb.RKey
@@ -2075,7 +2064,6 @@ func (r *Replica) sendRaftMessage(
RangeStartKey: startKey, // usually nil
UsingRac2Protocol: r.flowControlV2.GetEnabledWhenLeader() >= kvflowcontrol.V2EnabledWhenLeaderV1Encoding,
LowPriorityOverride: lowPriorityOverride,
- TracedEntries: traced,
}
// For RACv2, annotate successful MsgAppResp messages with the vector of
// admitted log indices, by priority.
diff --git a/pkg/kv/kvserver/replica_store_liveness.go b/pkg/kv/kvserver/replica_store_liveness.go
index 274e84285028..9ef9929cb1be 100644
--- a/pkg/kv/kvserver/replica_store_liveness.go
+++ b/pkg/kv/kvserver/replica_store_liveness.go
@@ -18,6 +18,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
+ "github.com/cockroachdb/cockroach/pkg/util/log"
)
var raftLeaderFortificationFractionEnabled = settings.RegisterFloatSetting(
@@ -59,18 +60,12 @@ func (r *replicaRLockedStoreLiveness) getStoreIdent(
func (r *replicaRLockedStoreLiveness) SupportFor(replicaID raftpb.PeerID) (raftpb.Epoch, bool) {
storeID, ok := r.getStoreIdent(replicaID)
if !ok {
- return 0, false
- }
- // TODO(arul): we can remove this once we start to assign storeLiveness in the
- // Store constructor.
- if r.store.storeLiveness == nil {
+ ctx := r.AnnotateCtx(context.TODO())
+ log.Warningf(ctx, "store not found for replica %d in SupportFor", replicaID)
return 0, false
}
epoch, ok := r.store.storeLiveness.SupportFor(storeID)
- if !ok {
- return 0, false
- }
- return raftpb.Epoch(epoch), true
+ return raftpb.Epoch(epoch), ok
}
// SupportFrom implements the raftstoreliveness.StoreLiveness interface.
@@ -79,6 +74,8 @@ func (r *replicaRLockedStoreLiveness) SupportFrom(
) (raftpb.Epoch, hlc.Timestamp) {
storeID, ok := r.getStoreIdent(replicaID)
if !ok {
+ ctx := r.AnnotateCtx(context.TODO())
+ log.Warningf(ctx, "store not found for replica %d in SupportFrom", replicaID)
return 0, hlc.Timestamp{}
}
epoch, exp := r.store.storeLiveness.SupportFrom(storeID)
diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go
index 1b307a9cab69..b233127bedd1 100644
--- a/pkg/kv/kvserver/replica_test.go
+++ b/pkg/kv/kvserver/replica_test.go
@@ -7772,7 +7772,7 @@ func TestReplicaAbandonProposal(t *testing.T) {
dropProp := int32(1)
tc.repl.mu.Lock()
tc.repl.mu.proposalBuf.testing.submitProposalFilter = func(p *ProposalData) (drop bool, _ error) {
- if v := p.Context().Value(magicKey{}); v != nil {
+ if v := p.ctx.Value(magicKey{}); v != nil {
cancel()
return atomic.LoadInt32(&dropProp) == 1, nil
}
@@ -7890,7 +7890,7 @@ func TestReplicaRetryRaftProposal(t *testing.T) {
tc.repl.mu.Lock()
tc.repl.mu.proposalBuf.testing.leaseIndexFilter = func(p *ProposalData) (indexOverride kvpb.LeaseAppliedIndex) {
- if v := p.Context().Value(magicKey{}); v != nil {
+ if v := p.ctx.Value(magicKey{}); v != nil {
if curAttempt := atomic.AddInt32(&c, 1); curAttempt == 1 {
return wrongLeaseIndex
}
@@ -7994,7 +7994,7 @@ func TestReplicaCancelRaftCommandProgress(t *testing.T) {
abandoned := make(map[kvserverbase.CmdIDKey]struct{}) // protected by repl.mu
tc.repl.mu.proposalBuf.testing.submitProposalFilter = func(p *ProposalData) (drop bool, _ error) {
if _, ok := abandoned[p.idKey]; ok {
- log.Infof(p.Context(), "abandoning command")
+ log.Infof(p.ctx, "abandoning command")
return true, nil
}
return false, nil
@@ -8066,7 +8066,7 @@ func TestReplicaBurstPendingCommandsAndRepropose(t *testing.T) {
if atomic.LoadInt32(&dropAll) == 1 {
return true, nil
}
- if v := p.Context().Value(magicKey{}); v != nil {
+ if v := p.ctx.Value(magicKey{}); v != nil {
seenCmds = append(seenCmds, int(p.command.MaxLeaseIndex))
}
return false, nil
@@ -8098,7 +8098,7 @@ func TestReplicaBurstPendingCommandsAndRepropose(t *testing.T) {
}
origIndexes := make([]int, 0, num)
for _, p := range tc.repl.mu.proposals {
- if v := p.Context().Value(magicKey{}); v != nil {
+ if v := p.ctx.Value(magicKey{}); v != nil {
origIndexes = append(origIndexes, int(p.command.MaxLeaseIndex))
}
}
diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go
index 46867e63aee3..3ec55dbaf5bb 100644
--- a/pkg/kv/kvserver/store.go
+++ b/pkg/kv/kvserver/store.go
@@ -942,11 +942,6 @@ type Store struct {
// has likely improved).
draining atomic.Bool
- // concurrentRaftTraces is the number of concurrent raft trace requests that
- // are currently registered. This limit is used to prevent extensive raft
- // tracing from inadvertently impacting performance.
- concurrentRaftTraces atomic.Int64
-
// Locking notes: To avoid deadlocks, the following lock order must be
// obeyed: baseQueue.mu < Replica.raftMu < Replica.readOnlyCmdMu < Store.mu
// < Replica.mu < Replica.unreachablesMu < Store.coalescedMu < Store.scheduler.mu.
diff --git a/pkg/kv/kvserver/store_snapshot.go b/pkg/kv/kvserver/store_snapshot.go
index 4f83858857e3..4d0aee57e28e 100644
--- a/pkg/kv/kvserver/store_snapshot.go
+++ b/pkg/kv/kvserver/store_snapshot.go
@@ -735,13 +735,14 @@ func (kvSS *kvBatchSnapshotStrategy) Receive(
var prevWriteBytes int64
snapshotQ := s.cfg.KVAdmissionController.GetSnapshotQueue(s.StoreID())
+ if snapshotQ == nil {
+ log.Errorf(ctx, "unable to find snapshot queue for store: %s", s.StoreID())
+ }
// Using a nil pacer is effectively a noop if snapshot control is disabled.
var pacer *admission.SnapshotPacer = nil
- if admission.DiskBandwidthForSnapshotIngest.Get(&s.cfg.Settings.SV) {
- pacer = admission.NewSnapshotPacer(snapshotQ, s.StoreID())
+ if admission.DiskBandwidthForSnapshotIngest.Get(&s.cfg.Settings.SV) && snapshotQ != nil {
+ pacer = admission.NewSnapshotPacer(snapshotQ)
}
- // It is safe to call Close() on a nil pacer.
- defer pacer.Close()
for {
timingTag.start("recv")
diff --git a/pkg/kv/kvserver/testdata/replica_unavailable_error.txt b/pkg/kv/kvserver/testdata/replica_unavailable_error.txt
index aeb6077ecd85..430776562c70 100644
--- a/pkg/kv/kvserver/testdata/replica_unavailable_error.txt
+++ b/pkg/kv/kvserver/testdata/replica_unavailable_error.txt
@@ -1,3 +1,3 @@
echo
----
-replica unavailable: (n1,s10):1 unable to serve request to r10:‹{a-z}› [(n1,s10):1, (n2,s20):2, next=3, gen=0]: lost quorum (down: (n2,s20):2); closed timestamp: 1136214245.000000000,0 (2006-01-02 15:04:05); raft status: {"id":"0","term":0,"vote":"0","commit":0,"lead":"0","raftState":"StateFollower","applied":0,"progress":{},"leadtransferee":"0"}: probe failed
+replica unavailable: (n1,s10):1 unable to serve request to r10:‹{a-z}› [(n1,s10):1, (n2,s20):2, next=3, gen=0]: lost quorum (down: (n2,s20):2); closed timestamp: 1136214245.000000000,0 (2006-01-02 15:04:05); raft status: {"id":"0","term":0,"vote":"0","commit":0,"lead":"0","leadEpoch":"0","raftState":"StateFollower","applied":0,"progress":{},"leadtransferee":"0"}: probe failed
diff --git a/pkg/raft/BUILD.bazel b/pkg/raft/BUILD.bazel
index 86ce5ee107ae..f188e3af8430 100644
--- a/pkg/raft/BUILD.bazel
+++ b/pkg/raft/BUILD.bazel
@@ -27,6 +27,7 @@ go_library(
"//pkg/raft/tracker",
"//pkg/util/hlc",
"@com_github_cockroachdb_errors//:errors",
+ "@com_github_cockroachdb_redact//:redact",
"@org_golang_x_exp//maps",
],
)
diff --git a/pkg/raft/node_test.go b/pkg/raft/node_test.go
index f84e4f1650ef..6760383145bb 100644
--- a/pkg/raft/node_test.go
+++ b/pkg/raft/node_test.go
@@ -790,25 +790,25 @@ func TestNodeCommitPaginationAfterRestart(t *testing.T) {
}
s.hardState = persistedHardState
- entries := make([]raftpb.Entry, 10)
+ s.ents = make([]raftpb.Entry, 10)
var size uint64
- for i := range entries {
+ for i := range s.ents {
ent := raftpb.Entry{
Term: 1,
Index: uint64(i + 1),
Type: raftpb.EntryNormal,
Data: []byte("a"),
}
- entries[i] = ent
+
+ s.ents[i] = ent
size += uint64(ent.Size())
}
- s.ls = LogSlice{term: 1, entries: entries}
cfg := newTestConfig(1, 10, 1, s)
// Set a MaxSizePerMsg that would suggest to Raft that the last committed entry should
// not be included in the initial rd.CommittedEntries. However, our storage will ignore
// this and *will* return it (which is how the Commit index ended up being 10 initially).
- cfg.MaxSizePerMsg = size - uint64(entries[len(entries)-1].Size()) - 1
+ cfg.MaxSizePerMsg = size - uint64(s.ents[len(s.ents)-1].Size()) - 1
rn, err := NewRawNode(cfg)
require.NoError(t, err)
diff --git a/pkg/raft/quorum/joint.go b/pkg/raft/quorum/joint.go
index e806bff5dbcc..14f5c3c2ecb2 100644
--- a/pkg/raft/quorum/joint.go
+++ b/pkg/raft/quorum/joint.go
@@ -45,6 +45,20 @@ func (c JointConfig) IDs() map[pb.PeerID]struct{} {
return m
}
+// Visit calls the given function for each unique voter ID in the joint
+// configuration.
+func (c JointConfig) Visit(f func(pb.PeerID)) {
+ for id := range c[0] {
+ f(id)
+ }
+ for id := range c[1] {
+ if _, ok := c[0][id]; ok {
+ continue // skip duplicate
+ }
+ f(id)
+ }
+}
+
// Describe returns a (multi-line) representation of the commit indexes for the
// given lookuper.
func (c JointConfig) Describe(l AckedIndexer) string {
diff --git a/pkg/raft/quorum/quorum_test.go b/pkg/raft/quorum/quorum_test.go
index 7c0924b6720a..8da15666ba0e 100644
--- a/pkg/raft/quorum/quorum_test.go
+++ b/pkg/raft/quorum/quorum_test.go
@@ -6,6 +6,7 @@
package quorum
import (
+ "slices"
"testing"
pb "github.com/cockroachdb/cockroach/pkg/raft/raftpb"
@@ -134,3 +135,21 @@ func TestLeadSupportExpirationJointConfig(t *testing.T) {
require.Equal(t, tc.exp, j.LeadSupportExpiration(tc.support))
}
}
+
+func TestJointConfigVisit(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ defer log.Scope(t).Close(t)
+
+ j := JointConfig{
+ MajorityConfig{1: struct{}{}, 2: struct{}{}, 3: struct{}{}},
+ MajorityConfig{2: struct{}{}, 3: struct{}{}, 4: struct{}{}},
+ }
+
+ var visited []pb.PeerID
+ j.Visit(func(id pb.PeerID) {
+ visited = append(visited, id)
+ })
+ slices.Sort(visited)
+
+ require.Equal(t, []pb.PeerID{1, 2, 3, 4}, visited)
+}
diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go
index 7c5245a6d5ce..8e7e3ce05d52 100644
--- a/pkg/raft/raft.go
+++ b/pkg/raft/raft.go
@@ -36,6 +36,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/raft/raftstoreliveness"
"github.com/cockroachdb/cockroach/pkg/raft/tracker"
"github.com/cockroachdb/errors"
+ "github.com/cockroachdb/redact"
"golang.org/x/exp/maps"
)
@@ -1418,12 +1419,20 @@ func (r *raft) Step(m pb.Message) error {
if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote {
force := bytes.Equal(m.Context, []byte(campaignTransfer))
inHeartbeatLease := r.checkQuorum && r.lead != None && r.electionElapsed < r.electionTimeout
- // NB: A fortified leader is allowed to bump its term. It'll need to
- // re-fortify once if it gets elected at the higher term though, so the
- // leader must take care to not regress its supported expiration. However,
- // at the follower, we grant the fortified leader our vote at the higher
- // term.
- inFortifyLease := r.supportingFortifiedLeader() && r.lead != m.From
+ inFortifyLease := r.supportingFortifiedLeader() &&
+ // NB: A fortified leader is allowed to bump its term. It'll need to
+ // re-fortify once if it gets elected at the higher term though, so the
+ // leader must take care to not regress its supported expiration.
+ // However, at the follower, we grant the fortified leader our vote at
+ // the higher term.
+ r.lead != m.From &&
+ // NB: If the peer that's campaigning has an entry in its log with a
+ // higher term than what we're aware of, then this conclusively proves
+ // that a new leader was elected at a higher term. We never heard from
+ // this new leader (otherwise we'd have bumped r.Term in response).
+ // However, any fortification we're providing to a leader that has been
+ // since dethroned is pointless.
+ m.LogTerm <= r.Term
if !force && (inHeartbeatLease || inFortifyLease) {
// If a server receives a Request{,Pre}Vote message but is still
// supporting a fortified leader, it does not update its term or grant
@@ -1432,14 +1441,14 @@ func (r *raft) Step(m pb.Message) error {
// leader it does not update its term or grant its vote.
{
// Log why we're ignoring the Request{,Pre}Vote.
- var inHeartbeatLeaseMsg string
- var inFortifyLeaseMsg string
- var sep string
+ var inHeartbeatLeaseMsg redact.RedactableString
+ var inFortifyLeaseMsg redact.RedactableString
+ var sep redact.SafeString
if inHeartbeatLease {
- inHeartbeatLeaseMsg = fmt.Sprintf("recently received communication from leader (remaining ticks: %d)", r.electionTimeout-r.electionElapsed)
+ inHeartbeatLeaseMsg = redact.Sprintf("recently received communication from leader (remaining ticks: %d)", r.electionTimeout-r.electionElapsed)
}
if inFortifyLease {
- inFortifyLeaseMsg = fmt.Sprintf("supporting fortified leader %d at epoch %d", r.lead, r.leadEpoch)
+ inFortifyLeaseMsg = redact.Sprintf("supporting fortified leader %d at epoch %d", r.lead, r.leadEpoch)
}
if inFortifyLease && inHeartbeatLease {
sep = " and "
@@ -1551,11 +1560,12 @@ func (r *raft) Step(m pb.Message) error {
case pb.MsgVote, pb.MsgPreVote:
// We can vote if this is a repeat of a vote we've already cast...
canVote := r.Vote == m.From ||
- // ...we haven't voted and we don't think there's a leader yet in this term...
+ // ...OR we haven't voted and we don't think there's a leader yet in this
+ // term...
(r.Vote == None && r.lead == None) ||
- // ...or this is a PreVote for a future term...
+ // ...OR this is a PreVote for a future term...
(m.Type == pb.MsgPreVote && m.Term > r.Term)
- // ...and we believe the candidate is up to date.
+ // ...AND we believe the candidate is up to date.
lastID := r.raftLog.lastEntryID()
candLastID := entryID{term: m.LogTerm, index: m.Index}
if canVote && r.raftLog.isUpToDate(candLastID) {
diff --git a/pkg/raft/raft_test.go b/pkg/raft/raft_test.go
index ff1d6e732aff..ec0355acd803 100644
--- a/pkg/raft/raft_test.go
+++ b/pkg/raft/raft_test.go
@@ -863,10 +863,9 @@ func TestCandidateConcede(t *testing.T) {
assert.Equal(t, pb.StateFollower, a.state)
assert.Equal(t, uint64(1), a.Term)
- wantLog := ltoa(newLog(&MemoryStorage{ls: LogSlice{
- term: 1,
- entries: []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1, Data: data}},
- }}, nil))
+ wantLog := ltoa(newLog(&MemoryStorage{
+ ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Index: 2, Data: data}},
+ }, nil))
for i, p := range tt.peers {
if sm, ok := p.(*raft); ok {
l := ltoa(sm.raftLog)
@@ -904,12 +903,9 @@ func TestOldMessages(t *testing.T) {
// commit a new entry
tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}})
- ents := index(1).terms(1, 2, 3, 3)
- ents[3].Data = []byte("somedata")
- ilog := newLog(&MemoryStorage{ls: LogSlice{
- term: 3,
- entries: ents,
- }}, nil)
+ ents := index(0).terms(0, 1, 2, 3, 3)
+ ents[4].Data = []byte("somedata")
+ ilog := newLog(&MemoryStorage{ents: ents}, nil)
base := ltoa(ilog)
for i, p := range tt.peers {
if sm, ok := p.(*raft); ok {
@@ -958,10 +954,9 @@ func TestProposal(t *testing.T) {
wantLog := newLog(NewMemoryStorage(), raftlogger.RaftLogger)
if tt.success {
- wantLog = newLog(&MemoryStorage{ls: LogSlice{
- term: 2,
- entries: []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1, Data: data}},
- }}, nil)
+ wantLog = newLog(&MemoryStorage{
+ ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Index: 2, Data: data}},
+ }, nil)
}
base := ltoa(wantLog)
for i, p := range tt.peers {
@@ -990,10 +985,9 @@ func TestProposalByProxy(t *testing.T) {
// propose via follower
tt.send(pb.Message{From: 2, To: 2, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}})
- wantLog := newLog(&MemoryStorage{ls: LogSlice{
- term: 1,
- entries: []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1, Data: data}},
- }}, nil)
+ wantLog := newLog(&MemoryStorage{
+ ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Data: data, Index: 2}},
+ }, nil)
base := ltoa(wantLog)
for i, p := range tt.peers {
if sm, ok := p.(*raft); ok {
@@ -1376,10 +1370,7 @@ func testRecvMsgVote(t *testing.T, msgType pb.MessageType) {
sm.step = stepLeader
}
sm.Vote = tt.voteFor
- sm.raftLog = newLog(&MemoryStorage{ls: LogSlice{
- term: 2,
- entries: index(1).terms(2, 2),
- }}, nil)
+ sm.raftLog = newLog(&MemoryStorage{ents: index(0).terms(0, 2, 2)}, nil)
// raft.Term is greater than or equal to raft.raftLog.lastTerm. In this
// test we're only testing MsgVote responses when the campaigning node
@@ -2011,10 +2002,7 @@ func TestLeaderAppResp(t *testing.T) {
// sm term is 1 after it becomes the leader.
// thus the last log term must be 1 to be committed.
sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3)))
- sm.raftLog = newLog(&MemoryStorage{ls: LogSlice{
- term: 1,
- entries: index(1).terms(1, 1),
- }}, nil)
+ sm.raftLog = newLog(&MemoryStorage{ents: index(0).terms(0, 1, 1)}, nil)
sm.becomeCandidate()
sm.becomeLeader()
sm.readMessages()
@@ -2137,10 +2125,7 @@ func TestRecvMsgBeat(t *testing.T) {
for i, tt := range tests {
sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3)))
- sm.raftLog = newLog(&MemoryStorage{ls: LogSlice{
- term: 1,
- entries: index(1).terms(1, 1),
- }}, nil)
+ sm.raftLog = newLog(&MemoryStorage{ents: index(0).terms(0, 1, 1)}, nil)
sm.Term = 1
sm.state = tt.state
switch tt.state {
diff --git a/pkg/raft/raftpb/raft.go b/pkg/raft/raftpb/raft.go
index 2169809f9339..b2073df26232 100644
--- a/pkg/raft/raftpb/raft.go
+++ b/pkg/raft/raftpb/raft.go
@@ -24,12 +24,6 @@ type Epoch int64
// SafeValue implements the redact.SafeValue interface.
func (e Epoch) SafeValue() {}
-// The enums in raft are all safe for redaction.
-func (MessageType) SafeValue() {}
-func (EntryType) SafeValue() {}
-func (ConfChangeType) SafeValue() {}
-func (ConfChangeTransition) SafeValue() {}
-
// Priority specifies per-entry priorities, that are local to the interaction
// between a leader-replica pair, i.e., they are not an invariant of a
// particular entry in the raft log (the replica could be the leader itself or
diff --git a/pkg/raft/rawnode_test.go b/pkg/raft/rawnode_test.go
index 97ba8b7c6988..40dc2981b9fc 100644
--- a/pkg/raft/rawnode_test.go
+++ b/pkg/raft/rawnode_test.go
@@ -481,7 +481,7 @@ func TestRawNodeStart(t *testing.T) {
}
storage := NewMemoryStorage()
- storage.ls = LogSlice{term: 1, prev: entryID{index: 1, term: 1}}
+ storage.ents[0].Index = 1
// TODO(tbg): this is a first prototype of what bootstrapping could look
// like (without the annoying faux ConfChanges). We want to persist a
@@ -500,13 +500,16 @@ func TestRawNodeStart(t *testing.T) {
}
bootstrap := func(storage appenderStorage, cs pb.ConfState) error {
require.NotEmpty(t, cs.Voters, "no voters specified")
- fi, li := storage.FirstIndex(), storage.LastIndex()
+ fi := storage.FirstIndex()
require.GreaterOrEqual(t, fi, uint64(2), "FirstIndex >= 2 is prerequisite for bootstrap")
- require.Equal(t, fi, li+1, "the log must be empty")
- entries, err := storage.Entries(fi, li+1, math.MaxUint64)
- require.NoError(t, err)
- require.Empty(t, entries, "should not have been able to load any entries")
+ _, err := storage.Entries(fi, fi, math.MaxUint64)
+ // TODO(tbg): match exact error
+ require.Error(t, err, "should not have been able to load first index")
+
+ li := storage.LastIndex()
+ _, err = storage.Entries(li, li, math.MaxUint64)
+ require.Error(t, err, "should not have been able to load last index")
hs, ics, err := storage.InitialState()
require.NoError(t, err)
@@ -669,32 +672,34 @@ func TestRawNodeCommitPaginationAfterRestart(t *testing.T) {
s := &ignoreSizeHintMemStorage{
MemoryStorage: newTestMemoryStorage(withPeers(1)),
}
- s.hardState = pb.HardState{
+ persistedHardState := pb.HardState{
Term: 1,
Vote: 1,
Commit: 10,
}
- entries := make([]pb.Entry, 10)
+
+ s.hardState = persistedHardState
+ s.ents = make([]pb.Entry, 10)
var size uint64
- for i := range entries {
+ for i := range s.ents {
ent := pb.Entry{
Term: 1,
Index: uint64(i + 1),
Type: pb.EntryNormal,
Data: []byte("a"),
}
- entries[i] = ent
+
+ s.ents[i] = ent
size += uint64(ent.Size())
}
- s.ls = LogSlice{term: 1, entries: entries}
cfg := newTestConfig(1, 10, 1, s)
// Set a MaxSizePerMsg that would suggest to Raft that the last committed entry should
// not be included in the initial rd.CommittedEntries. However, our storage will ignore
// this and *will* return it (which is how the Commit index ended up being 10 initially).
- cfg.MaxSizePerMsg = size - uint64(entries[len(entries)-1].Size()) - 1
+ cfg.MaxSizePerMsg = size - uint64(s.ents[len(s.ents)-1].Size()) - 1
- s.ls.entries = append(s.ls.entries, pb.Entry{
+ s.ents = append(s.ents, pb.Entry{
Term: 1,
Index: uint64(11),
Type: pb.EntryNormal,
diff --git a/pkg/raft/status.go b/pkg/raft/status.go
index cb9ac6ad47af..4c5208eff0c0 100644
--- a/pkg/raft/status.go
+++ b/pkg/raft/status.go
@@ -162,8 +162,8 @@ func getLeadSupportStatus(r *raft) LeadSupportStatus {
// MarshalJSON translates the raft status into JSON.
func (s Status) MarshalJSON() ([]byte, error) {
- j := fmt.Sprintf(`{"id":"%x","term":%d,"vote":"%x","commit":%d,"lead":"%x","raftState":%q,"applied":%d,"progress":{`,
- s.ID, s.Term, s.Vote, s.Commit, s.Lead, s.RaftState, s.Applied)
+ j := fmt.Sprintf(`{"id":"%x","term":%d,"vote":"%x","commit":%d,"lead":"%x","leadEpoch":"%d","raftState":%q,"applied":%d,"progress":{`,
+ s.ID, s.Term, s.Vote, s.Commit, s.Lead, s.LeadEpoch, s.RaftState, s.Applied)
if len(s.Progress) == 0 {
j += "},"
diff --git a/pkg/raft/storage.go b/pkg/raft/storage.go
index 71823fe52de8..608743b1e491 100644
--- a/pkg/raft/storage.go
+++ b/pkg/raft/storage.go
@@ -132,9 +132,8 @@ type inMemStorageCallStats struct {
initialState, firstIndex, lastIndex, entries, term, snapshot int
}
-// MemoryStorage implements the Storage interface backed by an in-memory slice.
-//
-// TODO(pav-kv): split into LogStorage and StateStorage.
+// MemoryStorage implements the Storage interface backed by an
+// in-memory array.
type MemoryStorage struct {
// Protects access to all fields. Most methods of MemoryStorage are
// run on the raft goroutine, but Append() is run on an application
@@ -143,21 +142,18 @@ type MemoryStorage struct {
hardState pb.HardState
snapshot pb.Snapshot
-
- // ls contains the log entries.
- //
- // TODO(pav-kv): the term field of the LogSlice is conservatively populated
- // to be the last entry term, to keep the LogSlice valid. But it must be
- // sourced from the upper layer's last accepted term (which is >= the last
- // entry term).
- ls LogSlice
+ // ents[i] has raft log position i+snapshot.Metadata.Index
+ ents []pb.Entry
callStats inMemStorageCallStats
}
// NewMemoryStorage creates an empty MemoryStorage.
func NewMemoryStorage() *MemoryStorage {
- return &MemoryStorage{}
+ return &MemoryStorage{
+ // When starting from scratch populate the list with a dummy entry at term zero.
+ ents: make([]pb.Entry, 1),
+ }
}
// InitialState implements the Storage interface.
@@ -179,17 +175,22 @@ func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) {
ms.Lock()
defer ms.Unlock()
ms.callStats.entries++
-
- if lo <= ms.ls.prev.index {
+ offset := ms.ents[0].Index
+ if lo <= offset {
return nil, ErrCompacted
- } else if last := ms.ls.lastIndex(); hi > last+1 {
- raftlogger.GetLogger().Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, last)
+ }
+ if hi > ms.lastIndex()+1 {
+ raftlogger.GetLogger().Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex())
+ }
+ // only contains dummy entries.
+ if len(ms.ents) == 1 {
+ return nil, ErrUnavailable
}
- ents := limitSize(ms.ls.sub(lo-1, hi-1), entryEncodingSize(maxSize))
+ ents := limitSize(ms.ents[lo-offset:hi-offset], entryEncodingSize(maxSize))
// NB: use the full slice expression to limit what the caller can do with the
// returned slice. For example, an append will reallocate and copy this slice
- // instead of corrupting the neighbouring entries.
+ // instead of corrupting the neighbouring ms.ents.
return ents[:len(ents):len(ents)], nil
}
@@ -198,12 +199,14 @@ func (ms *MemoryStorage) Term(i uint64) (uint64, error) {
ms.Lock()
defer ms.Unlock()
ms.callStats.term++
- if i < ms.ls.prev.index {
+ offset := ms.ents[0].Index
+ if i < offset {
return 0, ErrCompacted
- } else if i > ms.ls.lastIndex() {
+ }
+ if int(i-offset) >= len(ms.ents) {
return 0, ErrUnavailable
}
- return ms.ls.termAt(i), nil
+ return ms.ents[i-offset].Term, nil
}
// LastIndex implements the Storage interface.
@@ -211,7 +214,11 @@ func (ms *MemoryStorage) LastIndex() uint64 {
ms.Lock()
defer ms.Unlock()
ms.callStats.lastIndex++
- return ms.ls.lastIndex()
+ return ms.lastIndex()
+}
+
+func (ms *MemoryStorage) lastIndex() uint64 {
+ return ms.ents[0].Index + uint64(len(ms.ents)) - 1
}
// FirstIndex implements the Storage interface.
@@ -219,19 +226,17 @@ func (ms *MemoryStorage) FirstIndex() uint64 {
ms.Lock()
defer ms.Unlock()
ms.callStats.firstIndex++
- return ms.ls.prev.index + 1
+ return ms.firstIndex()
+}
+
+func (ms *MemoryStorage) firstIndex() uint64 {
+ return ms.ents[0].Index + 1
}
// LogSnapshot implements the LogStorage interface.
func (ms *MemoryStorage) LogSnapshot() LogStorageSnapshot {
- // Copy the log slice, and protect MemoryStorage from potential appends to it.
- // Both MemoryStorage and the caller can append to the slice, but the full
- // slice expression makes sure the two don't corrupt each other's slices.
- ls := ms.ls
- ls.entries = ls.entries[:len(ls.entries):len(ls.entries)]
- // TODO(pav-kv): we don't need all other fields in MemoryStorage. Factor out a
- // LogStorage sub-type, and return just the log slice with it.
- return &MemoryStorage{ls: ls}
+ // TODO(pav-kv): return an immutable subset of MemoryStorage.
+ return ms
}
// Snapshot implements the Storage interface.
@@ -247,19 +252,16 @@ func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) {
func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error {
ms.Lock()
defer ms.Unlock()
- id := entryID{index: snap.Metadata.Index, term: snap.Metadata.Term}
- // Check whether the snapshot is outdated.
- if id.index <= ms.snapshot.Metadata.Index {
+
+ //handle check for old snapshot being applied
+ msIndex := ms.snapshot.Metadata.Index
+ snapIndex := snap.Metadata.Index
+ if msIndex >= snapIndex {
return ErrSnapOutOfDate
}
- // The new snapshot represents committed state, so its last entry should be
- // consistent with the previously committed one.
- if oldTerm := ms.snapshot.Metadata.Term; id.term < oldTerm {
- raftlogger.GetLogger().Panicf("snapshot at %+v regresses the term %d", id, oldTerm)
- }
+
ms.snapshot = snap
- // TODO(pav-kv): the term must be the last accepted term passed in.
- ms.ls = LogSlice{term: id.term, prev: id}
+ ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}}
return nil
}
@@ -274,12 +276,15 @@ func (ms *MemoryStorage) CreateSnapshot(
defer ms.Unlock()
if i <= ms.snapshot.Metadata.Index {
return pb.Snapshot{}, ErrSnapOutOfDate
- } else if last := ms.ls.lastIndex(); i > last {
- raftlogger.GetLogger().Panicf("snapshot %d is out of bound lastindex(%d)", i, last)
+ }
+
+ offset := ms.ents[0].Index
+ if i > ms.lastIndex() {
+ raftlogger.GetLogger().Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex())
}
ms.snapshot.Metadata.Index = i
- ms.snapshot.Metadata.Term = ms.ls.termAt(i)
+ ms.snapshot.Metadata.Term = ms.ents[i-offset].Term
if cs != nil {
ms.snapshot.Metadata.ConfState = *cs
}
@@ -287,60 +292,66 @@ func (ms *MemoryStorage) CreateSnapshot(
return ms.snapshot, nil
}
-// Compact discards all log entries <= index.
+// Compact discards all log entries prior to compactIndex.
// It is the application's responsibility to not attempt to compact an index
// greater than raftLog.applied.
-func (ms *MemoryStorage) Compact(index uint64) error {
+func (ms *MemoryStorage) Compact(compactIndex uint64) error {
ms.Lock()
defer ms.Unlock()
- if index <= ms.ls.prev.index {
+ offset := ms.ents[0].Index
+ if compactIndex <= offset {
return ErrCompacted
- } else if last := ms.ls.lastIndex(); index > last {
- raftlogger.GetLogger().Panicf("compact %d is out of bound lastindex(%d)", index, last)
}
- ms.ls = ms.ls.forward(index)
+ if compactIndex > ms.lastIndex() {
+ raftlogger.GetLogger().Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex())
+ }
+
+ i := compactIndex - offset
+ // NB: allocate a new slice instead of reusing the old ms.ents. Entries in
+ // ms.ents are immutable, and can be referenced from outside MemoryStorage
+ // through slices returned by ms.Entries().
+ ents := make([]pb.Entry, 1, uint64(len(ms.ents))-i)
+ ents[0].Index = ms.ents[i].Index
+ ents[0].Term = ms.ents[i].Term
+ ents = append(ents, ms.ents[i+1:]...)
+ ms.ents = ents
return nil
}
// Append the new entries to storage.
-//
-// TODO(pav-kv): pass in a LogSlice which carries correctness semantics.
+// TODO (xiangli): ensure the entries are continuous and
+// entries[0].Index > ms.entries[0].Index
func (ms *MemoryStorage) Append(entries []pb.Entry) error {
if len(entries) == 0 {
return nil
}
+
ms.Lock()
defer ms.Unlock()
- first := entries[0].Index
- if first <= ms.ls.prev.index {
- // Can not append at indices <= the compacted index.
- return ErrCompacted
- } else if last := ms.ls.lastIndex(); first > last+1 {
- raftlogger.GetLogger().Panicf("missing log entry [last: %d, append at: %d]", last, first)
- }
-
- // TODO(pav-kv): this must have the correct last accepted term. Pass in the
- // logSlice to this append method to update it correctly.
- ms.ls.term = entries[len(entries)-1].Term
+ first := ms.firstIndex()
+ last := entries[0].Index + uint64(len(entries)) - 1
- if first == ms.ls.lastIndex()+1 { // appending at the end of the log
- ms.ls.entries = append(ms.ls.entries, entries...)
- } else { // first <= lastIndex, after checks above
- prefix := ms.ls.sub(ms.ls.prev.index, first-1)
- // NB: protect the suffix of the old slice from rewrites.
- ms.ls.entries = append(prefix[:len(prefix):len(prefix)], entries...)
+ // shortcut if there is no new entry.
+ if last < first {
+ return nil
+ }
+ // truncate compacted entries
+ if first > entries[0].Index {
+ entries = entries[first-entries[0].Index:]
}
- return nil
-}
-// MakeLogSnapshot converts the MemoryStorage to a LogSnapshot type serving the
-// log from the MemoryStorage snapshot. Only for testing.
-func MakeLogSnapshot(ms *MemoryStorage) LogSnapshot {
- return LogSnapshot{
- first: ms.FirstIndex(),
- storage: ms.LogSnapshot(),
- unstable: ms.ls.forward(ms.ls.lastIndex()),
- logger: raftlogger.DiscardLogger,
+ offset := entries[0].Index - ms.ents[0].Index
+ switch {
+ case uint64(len(ms.ents)) > offset:
+ // NB: full slice expression protects ms.ents at index >= offset from
+ // rewrites, as they may still be referenced from outside MemoryStorage.
+ ms.ents = append(ms.ents[:offset:offset], entries...)
+ case uint64(len(ms.ents)) == offset:
+ ms.ents = append(ms.ents, entries...)
+ default:
+ raftlogger.GetLogger().Panicf("missing log entry [last: %d, append at: %d]",
+ ms.lastIndex(), entries[0].Index)
}
+ return nil
}
diff --git a/pkg/raft/storage_test.go b/pkg/raft/storage_test.go
index 9af5b91a6546..f5a3f7d4d0e7 100644
--- a/pkg/raft/storage_test.go
+++ b/pkg/raft/storage_test.go
@@ -26,8 +26,7 @@ import (
)
func TestStorageTerm(t *testing.T) {
- prev3 := entryID{index: 3, term: 3}
- ls := prev3.append(4, 5)
+ ents := index(3).terms(3, 4, 5)
tests := []struct {
i uint64
@@ -44,7 +43,8 @@ func TestStorageTerm(t *testing.T) {
for _, tt := range tests {
t.Run("", func(t *testing.T) {
- s := &MemoryStorage{ls: ls}
+ s := &MemoryStorage{ents: ents}
+
if tt.wpanic {
require.Panics(t, func() {
_, _ = s.Term(tt.i)
@@ -58,9 +58,7 @@ func TestStorageTerm(t *testing.T) {
}
func TestStorageEntries(t *testing.T) {
- prev3 := entryID{index: 3, term: 3}
- ls := prev3.append(4, 5, 6)
- ents := ls.entries
+ ents := index(3).terms(3, 4, 5, 6)
tests := []struct {
lo, hi, maxsize uint64
@@ -75,17 +73,17 @@ func TestStorageEntries(t *testing.T) {
// even if maxsize is zero, the first entry should be returned
{4, 7, 0, nil, index(4).terms(4)},
// limit to 2
- {4, 7, uint64(ents[0].Size() + ents[1].Size()), nil, index(4).terms(4, 5)},
+ {4, 7, uint64(ents[1].Size() + ents[2].Size()), nil, index(4).terms(4, 5)},
// limit to 2
- {4, 7, uint64(ents[0].Size() + ents[1].Size() + ents[2].Size()/2), nil, index(4).terms(4, 5)},
- {4, 7, uint64(ents[0].Size() + ents[1].Size() + ents[2].Size() - 1), nil, index(4).terms(4, 5)},
+ {4, 7, uint64(ents[1].Size() + ents[2].Size() + ents[3].Size()/2), nil, index(4).terms(4, 5)},
+ {4, 7, uint64(ents[1].Size() + ents[2].Size() + ents[3].Size() - 1), nil, index(4).terms(4, 5)},
// all
- {4, 7, uint64(ents[0].Size() + ents[1].Size() + ents[2].Size()), nil, index(4).terms(4, 5, 6)},
+ {4, 7, uint64(ents[1].Size() + ents[2].Size() + ents[3].Size()), nil, index(4).terms(4, 5, 6)},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
- s := &MemoryStorage{ls: ls}
+ s := &MemoryStorage{ents: ents}
entries, err := s.Entries(tt.lo, tt.hi, tt.maxsize)
require.Equal(t, tt.werr, err)
require.Equal(t, tt.wentries, entries)
@@ -94,21 +92,23 @@ func TestStorageEntries(t *testing.T) {
}
func TestStorageLastIndex(t *testing.T) {
- s := &MemoryStorage{ls: entryID{index: 3, term: 3}.append(4, 5)}
+ ents := index(3).terms(3, 4, 5)
+ s := &MemoryStorage{ents: ents}
require.Equal(t, uint64(5), s.LastIndex())
require.NoError(t, s.Append(index(6).terms(5)))
require.Equal(t, uint64(6), s.LastIndex())
}
func TestStorageFirstIndex(t *testing.T) {
- s := &MemoryStorage{ls: entryID{index: 3, term: 3}.append(4, 5)}
+ ents := index(3).terms(3, 4, 5)
+ s := &MemoryStorage{ents: ents}
require.Equal(t, uint64(4), s.FirstIndex())
require.NoError(t, s.Compact(4))
require.Equal(t, uint64(5), s.FirstIndex())
}
func TestStorageCompact(t *testing.T) {
- ls := entryID{index: 3, term: 3}.append(4, 5)
+ ents := index(3).terms(3, 4, 5)
tests := []struct {
i uint64
@@ -117,25 +117,25 @@ func TestStorageCompact(t *testing.T) {
wterm uint64
wlen int
}{
- {2, ErrCompacted, 3, 3, 2},
- {3, ErrCompacted, 3, 3, 2},
- {4, nil, 4, 4, 1},
- {5, nil, 5, 5, 0},
+ {2, ErrCompacted, 3, 3, 3},
+ {3, ErrCompacted, 3, 3, 3},
+ {4, nil, 4, 4, 2},
+ {5, nil, 5, 5, 1},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
- s := &MemoryStorage{ls: ls}
+ s := &MemoryStorage{ents: ents}
require.Equal(t, tt.werr, s.Compact(tt.i))
- require.Equal(t, tt.windex, s.ls.prev.index)
- require.Equal(t, tt.wterm, s.ls.prev.term)
- require.Equal(t, tt.wlen, len(s.ls.entries))
+ require.Equal(t, tt.windex, s.ents[0].Index)
+ require.Equal(t, tt.wterm, s.ents[0].Term)
+ require.Equal(t, tt.wlen, len(s.ents))
})
}
}
func TestStorageCreateSnapshot(t *testing.T) {
- ls := entryID{index: 3, term: 3}.append(4, 5)
+ ents := index(3).terms(3, 4, 5)
cs := &pb.ConfState{Voters: []pb.PeerID{1, 2, 3}}
data := []byte("data")
@@ -151,7 +151,7 @@ func TestStorageCreateSnapshot(t *testing.T) {
for _, tt := range tests {
t.Run("", func(t *testing.T) {
- s := &MemoryStorage{ls: ls}
+ s := &MemoryStorage{ents: ents}
snap, err := s.CreateSnapshot(tt.i, cs, data)
require.Equal(t, tt.werr, err)
require.Equal(t, tt.wsnap, snap)
@@ -160,7 +160,7 @@ func TestStorageCreateSnapshot(t *testing.T) {
}
func TestStorageAppend(t *testing.T) {
- ls := entryID{index: 3, term: 3}.append(4, 5)
+ ents := index(3).terms(3, 4, 5)
tests := []struct {
entries []pb.Entry
@@ -169,43 +169,49 @@ func TestStorageAppend(t *testing.T) {
}{
{
index(1).terms(1, 2),
- ErrCompacted,
- index(4).terms(4, 5),
+ nil,
+ index(3).terms(3, 4, 5),
},
{
index(3).terms(3, 4, 5),
- ErrCompacted,
- index(4).terms(4, 5),
+ nil,
+ index(3).terms(3, 4, 5),
},
{
- index(4).terms(6, 6),
+ index(3).terms(3, 6, 6),
nil,
- index(4).terms(6, 6),
+ index(3).terms(3, 6, 6),
},
{
- index(4).terms(4, 5, 5),
+ index(3).terms(3, 4, 5, 5),
nil,
- index(4).terms(4, 5, 5),
+ index(3).terms(3, 4, 5, 5),
+ },
+ // Truncate incoming entries, truncate the existing entries and append.
+ {
+ index(2).terms(3, 3, 5),
+ nil,
+ index(3).terms(3, 5),
},
// Truncate the existing entries and append.
{
index(4).terms(5),
nil,
- index(4).terms(5),
+ index(3).terms(3, 5),
},
// Direct append.
{
index(6).terms(5),
nil,
- index(4).terms(4, 5, 5),
+ index(3).terms(3, 4, 5, 5),
},
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
- s := &MemoryStorage{ls: ls}
+ s := &MemoryStorage{ents: ents}
require.Equal(t, tt.werr, s.Append(tt.entries))
- require.Equal(t, tt.wentries, s.ls.entries)
+ require.Equal(t, tt.wentries, s.ents)
})
}
}
@@ -229,24 +235,3 @@ func TestStorageApplySnapshot(t *testing.T) {
tt = tests[i]
require.Equal(t, ErrSnapOutOfDate, s.ApplySnapshot(tt))
}
-
-func TestStorageLogSnapshot(t *testing.T) {
- s := NewMemoryStorage()
- require.NoError(t, s.Append(index(1).terms(1, 2, 3)))
- snap := s.LogSnapshot()
- // The snapshot must be immutable regardless of mutations on the storage.
- check := func() {
- require.Equal(t, uint64(1), snap.FirstIndex())
- require.Equal(t, uint64(3), snap.LastIndex())
- entries, err := snap.Entries(snap.FirstIndex(), snap.LastIndex()+1, math.MaxUint64)
- require.NoError(t, err)
- require.Equal(t, index(1).terms(1, 2, 3), entries)
- }
- check()
- require.NoError(t, s.Append(index(4).terms(4, 5))) // regular append
- check()
- require.NoError(t, s.Append(index(2).terms(7, 7, 7))) // truncation and append
- check()
- require.NoError(t, s.Compact(4)) // compaction
- check()
-}
diff --git a/pkg/raft/tracker/fortificationtracker.go b/pkg/raft/tracker/fortificationtracker.go
index b8a889f3f697..2061ea1a4378 100644
--- a/pkg/raft/tracker/fortificationtracker.go
+++ b/pkg/raft/tracker/fortificationtracker.go
@@ -164,26 +164,31 @@ func (ft *FortificationTracker) computeLeadSupportUntil(state pb.StateType) hlc.
if state != pb.StateLeader {
panic("computeLeadSupportUntil should only be called by the leader")
}
+ if len(ft.fortification) == 0 {
+ return hlc.Timestamp{} // fast-path for no fortification
+ }
// TODO(arul): avoid this map allocation as we're calling LeadSupportUntil
// from hot paths.
supportExpMap := make(map[pb.PeerID]hlc.Timestamp)
- for id, supportEpoch := range ft.fortification {
- curEpoch, curExp := ft.storeLiveness.SupportFrom(id)
- // NB: We can't assert that supportEpoch <= curEpoch because there may be a
- // race between a successful MsgFortifyLeaderResp and the store liveness
- // heartbeat response that lets the leader know the follower's store is
- // supporting the leader's store at the epoch in the MsgFortifyLeaderResp
- // message.
- if curEpoch == supportEpoch {
- supportExpMap[id] = curExp
+ ft.config.Voters.Visit(func(id pb.PeerID) {
+ if supportEpoch, ok := ft.fortification[id]; ok {
+ curEpoch, curExp := ft.storeLiveness.SupportFrom(id)
+ // NB: We can't assert that supportEpoch <= curEpoch because there may be
+ // a race between a successful MsgFortifyLeaderResp and the store liveness
+ // heartbeat response that lets the leader know the follower's store is
+ // supporting the leader's store at the epoch in the MsgFortifyLeaderResp
+ // message.
+ if curEpoch == supportEpoch {
+ supportExpMap[id] = curExp
+ }
}
- }
+ })
return ft.config.Voters.LeadSupportExpiration(supportExpMap)
}
// CanDefortify returns whether the caller can safely[1] de-fortify the term
-// based on the sate tracked by the FortificationTracker.
+// based on the state tracked by the FortificationTracker.
//
// [1] Without risking regressions in the maximum that's ever been indicated to
// the layers above. Or, more simply, without risking regression of leader
diff --git a/pkg/raft/types.go b/pkg/raft/types.go
index 30cd3ab0f645..20e734563e8b 100644
--- a/pkg/raft/types.go
+++ b/pkg/raft/types.go
@@ -90,10 +90,6 @@ func (l LogMark) After(other LogMark) bool {
// is sourced from a message that was received via transport, or from Storage,
// or in a test code that manually hard-codes this struct. In these cases, the
// invariants should be validated using the valid() method.
-//
-// The LogSlice is immutable. The entries slice must not be mutated, but it can
-// be appended to in some cases, when the callee protects its underlying slice
-// by capping the returned entries slice with a full slice expression.
type LogSlice struct {
// term is the leader term containing the given entries in its log.
term uint64
@@ -103,6 +99,15 @@ type LogSlice struct {
entries []pb.Entry
}
+// MakeLogSlice creates a fake log slice containing the supplied entries. Only
+// for testing.
+//
+// TODO(pav-kv): this is not a correct LogSlice. Remove this function, and help
+// construct a correct one.
+func MakeLogSlice(entries []pb.Entry) LogSlice {
+ return LogSlice{entries: entries}
+}
+
// Entries returns the log entries covered by this slice. The returned slice
// must not be mutated.
func (s LogSlice) Entries() []pb.Entry {
diff --git a/pkg/raft/util.go b/pkg/raft/util.go
index 2f86a7d651ff..dfad989b062b 100644
--- a/pkg/raft/util.go
+++ b/pkg/raft/util.go
@@ -200,10 +200,6 @@ func describeMessageWithIndent(indent string, m pb.Message, f EntryFormatter) st
return buf.String()
}
-func DescribeTarget(id pb.PeerID) string {
- return describeTarget(id)
-}
-
func describeTarget(id pb.PeerID) string {
switch id {
case None:
diff --git a/pkg/roachprod/install/files/cockroachdb-logging.yaml b/pkg/roachprod/install/files/cockroachdb-logging.yaml
index 7fdf4e6889bc..ba57e161e919 100644
--- a/pkg/roachprod/install/files/cockroachdb-logging.yaml
+++ b/pkg/roachprod/install/files/cockroachdb-logging.yaml
@@ -35,13 +35,13 @@ sinks:
channels: [STORAGE]
security:
channels: [PRIVILEGES, USER_ADMIN]
- auditable: true
+ auditable: false
sql-audit:
channels: [SENSITIVE_ACCESS]
- auditable: true
+ auditable: false
sql-auth:
channels: [SESSIONS]
- auditable: true
+ auditable: false
sql-exec:
channels: [SQL_EXEC]
sql-slow:
diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel
index d98c78705fb7..6533b62ba3ff 100644
--- a/pkg/server/BUILD.bazel
+++ b/pkg/server/BUILD.bazel
@@ -29,6 +29,7 @@ go_library(
"grpc_gateway.go",
"grpc_server.go",
"hot_ranges.go",
+ "http_metrics.go",
"import_ts.go",
"index_usage_stats.go",
"init.go",
@@ -362,6 +363,7 @@ go_library(
"@com_github_nightlyone_lockfile//:lockfile",
"@com_github_nytimes_gziphandler//:gziphandler",
"@com_github_pires_go_proxyproto//:go-proxyproto",
+ "@com_github_prometheus_client_model//go",
"@com_github_prometheus_common//expfmt",
"@in_gopkg_yaml_v2//:yaml_v2",
"@org_golang_google_grpc//:go_default_library",
@@ -433,6 +435,7 @@ go_test(
"graphite_test.go",
"grpc_gateway_test.go",
"helpers_test.go",
+ "http_metrics_test.go",
"index_usage_stats_test.go",
"job_profiler_test.go",
"listen_and_update_addrs_test.go",
@@ -578,6 +581,7 @@ go_test(
"@com_github_dustin_go_humanize//:go-humanize",
"@com_github_gogo_protobuf//jsonpb",
"@com_github_gogo_protobuf//proto",
+ "@com_github_gorilla_mux//:mux",
"@com_github_grpc_ecosystem_grpc_gateway//runtime:go_default_library",
"@com_github_jackc_pgx_v4//:pgx",
"@com_github_kr_pretty//:pretty",
diff --git a/pkg/server/api_v2.go b/pkg/server/api_v2.go
index ebec7c91a345..1a611cb2713e 100644
--- a/pkg/server/api_v2.go
+++ b/pkg/server/api_v2.go
@@ -103,6 +103,8 @@ func newAPIV2Server(ctx context.Context, opts *apiV2ServerOpts) http.Handler {
allowAnonymous := opts.sqlServer.cfg.Insecure
authMux := authserver.NewV2Mux(authServer, innerMux, allowAnonymous)
outerMux := mux.NewRouter()
+ serverMetrics := NewServerHttpMetrics(opts.sqlServer.MetricsRegistry(), opts.sqlServer.execCfg.Settings)
+ serverMetrics.registerMetricsMiddleware(outerMux)
systemAdmin, saOk := opts.admin.(*systemAdminServer)
systemStatus, ssOk := opts.status.(*systemStatusServer)
diff --git a/pkg/server/http_metrics.go b/pkg/server/http_metrics.go
new file mode 100644
index 000000000000..8c332f80a3a2
--- /dev/null
+++ b/pkg/server/http_metrics.go
@@ -0,0 +1,114 @@
+// Copyright 2024 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package server
+
+import (
+ "net/http"
+ "regexp"
+ "strconv"
+
+ "github.com/cockroachdb/cockroach/pkg/settings"
+ "github.com/cockroachdb/cockroach/pkg/settings/cluster"
+ "github.com/cockroachdb/cockroach/pkg/util/metric"
+ "github.com/cockroachdb/cockroach/pkg/util/timeutil"
+ "github.com/gorilla/mux"
+ prometheusgo "github.com/prometheus/client_model/go"
+)
+
+const (
+ MethodLabel = "method"
+ PathLabel = "path"
+ StatusCodeLabel = "statusCode"
+)
+
+var pathVarsRegex = regexp.MustCompile("{([A-z]+)(:[^}]*)?}")
+
+var serverHTTPMetricsEnabled = settings.RegisterBoolSetting(
+ settings.ApplicationLevel,
+ "server.http.metrics.enabled",
+ "enables to collection of http metrics",
+ false,
+)
+
+// responseWriter wraps http.ResponseWriter with a statusCode field to provide
+// access to the status code in metric reporting.
+type responseWriter struct {
+ http.ResponseWriter
+ statusCode int
+}
+
+func newResponseWriter(w http.ResponseWriter) *responseWriter {
+ return &responseWriter{w, http.StatusOK}
+}
+
+// WriteHeader implements http.ResponseWriter
+func (rw *responseWriter) WriteHeader(code int) {
+ rw.statusCode = code
+ rw.ResponseWriter.WriteHeader(code)
+}
+
+type HttpServerMetrics struct {
+ RequestMetrics *metric.HistogramVec
+ registry *metric.Registry
+ settings *cluster.Settings
+}
+
+func NewServerHttpMetrics(reg *metric.Registry, settings *cluster.Settings) *HttpServerMetrics {
+ metadata := metric.Metadata{
+ Name: "server.http.request.duration.nanos",
+ Help: "Duration of an HTTP request in nanoseconds.",
+ Measurement: "Duration",
+ Unit: metric.Unit_NANOSECONDS,
+ MetricType: prometheusgo.MetricType_HISTOGRAM,
+ }
+
+ histogramVec := metric.NewExportedHistogramVec(
+ metadata,
+ metric.ResponseTime30sBuckets,
+ []string{MethodLabel, PathLabel, StatusCodeLabel})
+ reg.AddMetric(histogramVec)
+ return &HttpServerMetrics{
+ RequestMetrics: histogramVec,
+ registry: reg,
+ settings: settings,
+ }
+}
+
+// registerMetricsMiddleware registers a middleware function on to the provided mux.Router to
+// capture metrics on http requests. The underlying metric uses a metric.HistogramVec, which
+// isn't recorded in tsdb.
+func (m *HttpServerMetrics) registerMetricsMiddleware(router *mux.Router) {
+ metricsMiddleWare := func(next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if !serverHTTPMetricsEnabled.Get(&m.settings.SV) {
+ next.ServeHTTP(w, r)
+ } else {
+ route := mux.CurrentRoute(r)
+ path, _ := route.GetPathTemplate()
+ rw := newResponseWriter(w)
+ sw := timeutil.NewStopWatch()
+ sw.Start()
+ next.ServeHTTP(rw, r)
+ sw.Stop()
+ m.RequestMetrics.Observe(map[string]string{
+ "path": formatPathVars(path),
+ "method": r.Method,
+ "statusCode": strconv.Itoa(rw.statusCode),
+ }, float64(sw.Elapsed().Nanoseconds()))
+ }
+ })
+ }
+ router.Use(metricsMiddleWare)
+}
+
+// formatPathVars replaces named path variables with just the
+// variable name, wrapped in <>. Any variable regex will be
+// removed. For example:
+// "/api/v2/database_metadata/{database_id:[0-9]+}" is
+// turned into" "/api/v2/database_metadata/"
+func formatPathVars(path string) string {
+ return pathVarsRegex.ReplaceAllString(path, "<$1>")
+}
diff --git a/pkg/server/http_metrics_test.go b/pkg/server/http_metrics_test.go
new file mode 100644
index 000000000000..3a0d94c30c05
--- /dev/null
+++ b/pkg/server/http_metrics_test.go
@@ -0,0 +1,262 @@
+// Copyright 2024 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package server
+
+import (
+ "context"
+ "fmt"
+ "net/http"
+ "net/http/httptest"
+ "strconv"
+ "testing"
+ "time"
+
+ "github.com/cockroachdb/cockroach/pkg/settings/cluster"
+ "github.com/cockroachdb/cockroach/pkg/util/leaktest"
+ "github.com/cockroachdb/cockroach/pkg/util/log"
+ "github.com/cockroachdb/cockroach/pkg/util/metric"
+ "github.com/gorilla/mux"
+ prometheusgo "github.com/prometheus/client_model/go"
+ "github.com/stretchr/testify/require"
+)
+
+func TestRegisterMetricsMiddleware(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ defer log.Scope(t).Close(t)
+ t.Run("cluster settings", func(t *testing.T) {
+ clusterSettings := cluster.MakeTestingClusterSettings()
+ serverMetrics := NewServerHttpMetrics(metric.NewRegistry(), clusterSettings)
+ router := mux.NewRouter()
+ serverMetrics.registerMetricsMiddleware(router)
+ router.Handle("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ time.Sleep(time.Millisecond)
+ w.WriteHeader(http.StatusOK)
+ }))
+ server := httptest.NewServer(router)
+ defer server.Close()
+ rr := httptest.NewRecorder()
+ req, err := http.NewRequest("GET", server.URL+"/", nil)
+ require.NoError(t, err)
+
+ router.ServeHTTP(rr, req)
+ metrics := serverMetrics.RequestMetrics.ToPrometheusMetrics()
+ require.Len(t, metrics, 0)
+
+ serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, true)
+ router.ServeHTTP(rr, req)
+ metrics = serverMetrics.RequestMetrics.ToPrometheusMetrics()
+ require.Len(t, metrics, 1)
+ assertPrometheusMetrics(t, metrics, map[string]uint64{
+ fmt.Sprintf("%s GET %s", strconv.Itoa(http.StatusOK), "/"): uint64(1),
+ })
+
+ serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, false)
+ router.ServeHTTP(rr, req)
+ metrics = serverMetrics.RequestMetrics.ToPrometheusMetrics()
+ require.Len(t, metrics, 1)
+ assertPrometheusMetrics(t, metrics, map[string]uint64{
+ fmt.Sprintf("%s GET %s", strconv.Itoa(http.StatusOK), "/"): uint64(1),
+ })
+
+ })
+ t.Run("metrics", func(t *testing.T) {
+ clusterSettings := cluster.MakeTestingClusterSettings()
+ serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, true)
+ serverMetrics := NewServerHttpMetrics(metric.NewRegistry(), clusterSettings)
+ pathUri := "/mypath/{path_var:[0-9]+}/"
+ router := mux.NewRouter()
+ serverMetrics.registerMetricsMiddleware(router)
+ shouldFail := false
+ handlerFunc := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ time.Sleep(time.Millisecond)
+ if shouldFail {
+ w.WriteHeader(http.StatusInternalServerError)
+ } else {
+ w.WriteHeader(http.StatusOK)
+ }
+ })
+ router.Handle(pathUri, handlerFunc).Methods(http.MethodGet, http.MethodPost)
+ server := httptest.NewServer(router)
+ defer server.Close()
+ getReq1, err := http.NewRequest("GET", server.URL+"/mypath/1/", nil)
+ require.NoError(t, err)
+ getReq2, err := http.NewRequest("GET", server.URL+"/mypath/2/", nil)
+ require.NoError(t, err)
+ postReq2, err := http.NewRequest("POST", server.URL+"/mypath/2/", nil)
+ require.NoError(t, err)
+ putReq3, err := http.NewRequest("PUT", server.URL+"/mypath/1/", nil)
+ require.NoError(t, err)
+ rr := httptest.NewRecorder()
+ router.ServeHTTP(rr, getReq1)
+ router.ServeHTTP(rr, getReq1)
+ router.ServeHTTP(rr, getReq2)
+ router.ServeHTTP(rr, postReq2)
+ router.ServeHTTP(rr, putReq3)
+
+ shouldFail = true
+ router.ServeHTTP(rr, postReq2)
+
+ metrics := serverMetrics.RequestMetrics.ToPrometheusMetrics()
+ // putReq3 won't be recorded because `PUT /mypath/1/` isn't a valid route
+ require.Len(t, metrics, 3)
+ assertPrometheusMetrics(t, metrics, map[string]uint64{
+ fmt.Sprintf("%s GET %s", strconv.Itoa(http.StatusOK), formatPathVars(pathUri)): uint64(3),
+ fmt.Sprintf("%s POST %s", strconv.Itoa(http.StatusOK), formatPathVars(pathUri)): uint64(1),
+ fmt.Sprintf("%s POST %s", strconv.Itoa(http.StatusInternalServerError), formatPathVars(pathUri)): uint64(1),
+ })
+ })
+}
+
+func TestFormatPathVars(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ defer log.Scope(t).Close(t)
+
+ type testcase struct {
+ name string
+ path string
+ expectedPath string
+ }
+ testcases := []testcase{
+ {name: "no variable", path: "/testpath/", expectedPath: "/testpath/"},
+ {name: "variable with regex", path: "/testpath/{param:[0-9]+}/", expectedPath: "/testpath//"},
+ {name: "multiple variables with regex", path: "/testpath/{param:[0-9]+}/{other_param:[\\w]}", expectedPath: "/testpath//"},
+ {name: "variable without regex", path: "/testpath/{param}/", expectedPath: "/testpath//"},
+ {name: "multiple variable without regex", path: "/testpath/{param}/{other_Param}/", expectedPath: "/testpath///"},
+ {name: "mixed variables", path: "/testpath/{param:[\\w]}/{otherParam}", expectedPath: "/testpath//"},
+ }
+
+ for _, tc := range testcases {
+ t.Run(tc.name, func(t *testing.T) {
+ require.Equal(t, tc.expectedPath, formatPathVars(tc.path))
+ })
+ }
+}
+
+func BenchmarkHTTPMetrics(b *testing.B) {
+ defer leaktest.AfterTest(b)()
+ defer log.Scope(b).Close(b)
+
+ b.Run("Metrics enabled", func(b *testing.B) {
+ b.StopTimer()
+ b.ResetTimer()
+ clusterSettings := cluster.MakeTestingClusterSettings()
+ serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, true)
+ serverMetrics := NewServerHttpMetrics(metric.NewRegistry(), clusterSettings)
+ server, router := newBenchmarkServer("/{param}/", serverMetrics)
+ defer server.Close()
+ r1, err := http.NewRequest("GET", server.URL+"/1/", nil)
+ require.NoError(b, err)
+ r2, err := http.NewRequest("GET", server.URL+"/2/", nil)
+ require.NoError(b, err)
+ r3, err := http.NewRequest("POST", server.URL+"/2/", nil)
+ require.NoError(b, err)
+ r4, err := http.NewRequest("PUT", server.URL+"/1/", nil)
+ require.NoError(b, err)
+ rr := httptest.NewRecorder()
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ router.ServeHTTP(rr, r1)
+ router.ServeHTTP(rr, r2)
+ router.ServeHTTP(rr, r3)
+ router.ServeHTTP(rr, r4)
+ }
+ require.Len(b, serverMetrics.RequestMetrics.ToPrometheusMetrics(), 2)
+ })
+
+ b.Run("Metrics disabled", func(b *testing.B) {
+ b.StopTimer()
+ b.ResetTimer()
+ clusterSettings := cluster.MakeTestingClusterSettings()
+ serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, false)
+ serverMetrics := NewServerHttpMetrics(metric.NewRegistry(), clusterSettings)
+ server, router := newBenchmarkServer("/{param}/", serverMetrics)
+ defer server.Close()
+ r1, err := http.NewRequest("GET", server.URL+"/1/", nil)
+ require.NoError(b, err)
+ r2, err := http.NewRequest("GET", server.URL+"/2/", nil)
+ require.NoError(b, err)
+ r3, err := http.NewRequest("POST", server.URL+"/2/", nil)
+ require.NoError(b, err)
+ r4, err := http.NewRequest("PUT", server.URL+"/1/", nil)
+ require.NoError(b, err)
+ rr := httptest.NewRecorder()
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ router.ServeHTTP(rr, r1)
+ router.ServeHTTP(rr, r2)
+ router.ServeHTTP(rr, r3)
+ router.ServeHTTP(rr, r4)
+ }
+ require.Len(b, serverMetrics.RequestMetrics.ToPrometheusMetrics(), 0)
+ })
+
+ b.Run("No Middleware", func(b *testing.B) {
+ b.StopTimer()
+ b.ResetTimer()
+ server, router := newBenchmarkServer("/{param}/", nil)
+ defer server.Close()
+ r1, err := http.NewRequest("GET", server.URL+"/1/", nil)
+ require.NoError(b, err)
+ r2, err := http.NewRequest("GET", server.URL+"/2/", nil)
+ require.NoError(b, err)
+ r3, err := http.NewRequest("POST", server.URL+"/2/", nil)
+ require.NoError(b, err)
+ r4, err := http.NewRequest("PUT", server.URL+"/1/", nil)
+ require.NoError(b, err)
+ rr := httptest.NewRecorder()
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ router.ServeHTTP(rr, r1)
+ router.ServeHTTP(rr, r2)
+ router.ServeHTTP(rr, r3)
+ router.ServeHTTP(rr, r4)
+ }
+ })
+}
+
+func newBenchmarkServer(
+ route string, serverMetrics *HttpServerMetrics,
+) (*httptest.Server, *mux.Router) {
+ router := mux.NewRouter()
+ router.Handle(route, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ time.Sleep(time.Millisecond)
+ w.WriteHeader(http.StatusOK)
+ })).Methods(http.MethodGet, http.MethodPost)
+ if serverMetrics != nil {
+ serverMetrics.registerMetricsMiddleware(router)
+ }
+ return httptest.NewServer(router), router
+}
+func assertPrometheusMetrics(
+ t *testing.T, metrics []*prometheusgo.Metric, expected map[string]uint64,
+) {
+ t.Helper()
+ actual := map[string]*prometheusgo.Histogram{}
+ for _, m := range metrics {
+ var method, path, statusCode string
+ for _, l := range m.Label {
+ switch *l.Name {
+ case MethodLabel:
+ method = *l.Value
+ case PathLabel:
+ path = *l.Value
+ case StatusCodeLabel:
+ statusCode = *l.Value
+ }
+ }
+ histogram := m.Histogram
+ require.NotNil(t, histogram, "expected histogram")
+ key := fmt.Sprintf("%s %s %s", statusCode, method, path)
+ actual[key] = histogram
+ }
+
+ for key, val := range expected {
+ histogram, ok := actual[key]
+ require.True(t, ok)
+ require.Greater(t, *histogram.SampleSum, float64(0), "expected `%s` to have a SampleSum > 0", key)
+ require.Equal(t, val, *histogram.SampleCount, "expected `%s` to have SampleCount of %d", key, val)
+ }
+}
diff --git a/pkg/sql/alter_default_privileges.go b/pkg/sql/alter_default_privileges.go
index 64683b9c36ae..d47c020fc020 100644
--- a/pkg/sql/alter_default_privileges.go
+++ b/pkg/sql/alter_default_privileges.go
@@ -134,10 +134,12 @@ func (n *alterDefaultPrivilegesNode) startExec(params runParams) error {
return err
}
+ var hasAdmin bool
+ if hasAdmin, err = params.p.HasAdminRole(params.ctx); err != nil {
+ return err
+ }
if n.n.ForAllRoles {
- if hasAdmin, err := params.p.HasAdminRole(params.ctx); err != nil {
- return err
- } else if !hasAdmin {
+ if !hasAdmin {
return pgerror.Newf(pgcode.InsufficientPrivilege,
"only users with the admin role are allowed to ALTER DEFAULT PRIVILEGES FOR ALL ROLES")
}
@@ -145,7 +147,7 @@ func (n *alterDefaultPrivilegesNode) startExec(params runParams) error {
// You can change default privileges only for objects that will be created
// by yourself or by roles that you are a member of.
for _, targetRole := range targetRoles {
- if targetRole != params.p.User() {
+ if targetRole != params.p.User() && !hasAdmin {
memberOf, err := params.p.MemberOfWithAdminOption(params.ctx, params.p.User())
if err != nil {
return err
@@ -153,7 +155,7 @@ func (n *alterDefaultPrivilegesNode) startExec(params runParams) error {
if _, found := memberOf[targetRole]; !found {
return pgerror.Newf(pgcode.InsufficientPrivilege,
- "must be a member of %s", targetRole.Normalized())
+ "must be an admin or member of %s", targetRole.Normalized())
}
}
}
diff --git a/pkg/sql/alter_table.go b/pkg/sql/alter_table.go
index 924d6b5316ab..861d979b035c 100644
--- a/pkg/sql/alter_table.go
+++ b/pkg/sql/alter_table.go
@@ -2330,8 +2330,17 @@ func checkSchemaChangeIsAllowed(desc catalog.TableDescriptor, n tree.Statement)
if desc.IsSchemaLocked() && !tree.IsSetOrResetSchemaLocked(n) {
return sqlerrors.NewSchemaChangeOnLockedTableErr(desc.GetName())
}
- if len(desc.TableDesc().LDRJobIDs) > 0 && !tree.IsAllowedLDRSchemaChange(n) {
- return sqlerrors.NewDisallowedSchemaChangeOnLDRTableErr(desc.GetName(), desc.TableDesc().LDRJobIDs)
+ if len(desc.TableDesc().LDRJobIDs) > 0 {
+ var virtualColNames []string
+ for _, col := range desc.NonDropColumns() {
+ if col.IsVirtual() {
+ virtualColNames = append(virtualColNames, col.GetName())
+ }
+ }
+ if !tree.IsAllowedLDRSchemaChange(n, virtualColNames) {
+ return sqlerrors.NewDisallowedSchemaChangeOnLDRTableErr(desc.GetName(), desc.TableDesc().LDRJobIDs)
+
+ }
}
return nil
}
diff --git a/pkg/sql/catalog/bootstrap/testdata/testdata b/pkg/sql/catalog/bootstrap/testdata/testdata
index 5cffbf9621e9..c720aa8c9f7e 100644
--- a/pkg/sql/catalog/bootstrap/testdata/testdata
+++ b/pkg/sql/catalog/bootstrap/testdata/testdata
@@ -1,7 +1,7 @@
-system hash=f93eb889512719710d1c75bff8a77ce6ad6c4e837319053f6f149ca13749d710
+system hash=f02637ca2ab3fa50efc1a4884f2406b8a0aad72a3f6249c5461e6e922a2e2491
----
[{"key":"8b"}
-,{"key":"8b89898a89","value":"0312470a0673797374656d10011a250a0d0a0561646d696e1080101880100a0c0a04726f6f7410801018801012046e6f646518032200280140004a006a0a08d8843d1002180020167000"}
+,{"key":"8b89898a89","value":"0312450a0673797374656d10011a250a0d0a0561646d696e1080101880100a0c0a04726f6f7410801018801012046e6f646518032200280140004a006a0808181002180020167000"}
,{"key":"8b898b8a89","value":"030a94030a0a64657363726970746f721803200128013a0042270a02696410011a0c08011040180030005014600020003000680070007800800100880100980100422f0a0a64657363726970746f7210021a0c08081000180030005011600020013000680070007800800100880100980100480352710a077072696d61727910011801220269642a0a64657363726970746f72300140004a10080010001a00200028003000380040005a0070027a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060026a210a0b0a0561646d696e102018200a0a0a04726f6f741020182012046e6f64651803800101880103980100b201130a077072696d61727910001a02696420012800b201240a1066616d5f325f64657363726970746f7210021a0a64657363726970746f7220022802b80103c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880302a80300b00300d00300d80300e00300f80300880400"}
,{"key":"8b898c8a89","value":"030acd050a0575736572731804200128013a00422d0a08757365726e616d6510011a0c0807100018003000501960002000300068007000780080010088010098010042330a0e68617368656450617373776f726410021a0c0808100018003000501160002001300068007000780080010088010098010042320a066973526f6c6510031a0c08001000180030005010600020002a0566616c73653000680070007800800100880100980100422c0a07757365725f696410041a0c080c100018003000501a60002000300068007000780080010088010098010048055290010a077072696d617279100118012208757365726e616d652a0e68617368656450617373776f72642a066973526f6c652a07757365725f6964300140004a10080010001a00200028003000380040005a007002700370047a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00102e00100e90100000000000000005a740a1175736572735f757365725f69645f696478100218012207757365725f69643004380140004a10080010001a00200028003000380040005a007a0408002000800100880100900103980100a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060036a250a0d0a0561646d696e10e00318e0030a0c0a04726f6f7410e00318e00312046e6f64651803800101880103980100b201240a077072696d61727910001a08757365726e616d651a07757365725f6964200120042804b2012c0a1466616d5f325f68617368656450617373776f726410021a0e68617368656450617373776f726420022802b2011c0a0c66616d5f335f6973526f6c6510031a066973526f6c6520032803b80104c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880303a80300b00300d00300d80300e00300f80300880400"}
,{"key":"8b898d8a89","value":"030a83030a057a6f6e65731805200128013a0042270a02696410011a0c08011040180030005014600020003000680070007800800100880100980100422b0a06636f6e66696710021a0c080810001800300050116000200130006800700078008001008801009801004803526d0a077072696d61727910011801220269642a06636f6e666967300140004a10080010001a00200028003000380040005a0070027a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060026a250a0d0a0561646d696e10e00318e0030a0c0a04726f6f7410e00318e00312046e6f64651803800101880103980100b201130a077072696d61727910001a02696420012800b2011c0a0c66616d5f325f636f6e66696710021a06636f6e66696720022802b80103c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880302a80300b00300d00300d80300e00300f80300880400"}
@@ -198,10 +198,10 @@ system hash=f93eb889512719710d1c75bff8a77ce6ad6c4e837319053f6f149ca13749d710
,{"key":"cb"}
]
-tenant hash=ec31fb2e5b85fbb8da0beded6f174ff0a8196088aebe8bf5cdeacb07689b6d6a
+tenant hash=e025f38b283dfb401584c95355095420047d10496ec2e9bf009b4a7d8fd09b5c
----
[{"key":""}
-,{"key":"8b89898a89","value":"0312470a0673797374656d10011a250a0d0a0561646d696e1080101880100a0c0a04726f6f7410801018801012046e6f646518032200280140004a006a0a08d8843d1002180020167000"}
+,{"key":"8b89898a89","value":"0312450a0673797374656d10011a250a0d0a0561646d696e1080101880100a0c0a04726f6f7410801018801012046e6f646518032200280140004a006a0808181002180020167000"}
,{"key":"8b898b8a89","value":"030a94030a0a64657363726970746f721803200128013a0042270a02696410011a0c08011040180030005014600020003000680070007800800100880100980100422f0a0a64657363726970746f7210021a0c08081000180030005011600020013000680070007800800100880100980100480352710a077072696d61727910011801220269642a0a64657363726970746f72300140004a10080010001a00200028003000380040005a0070027a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060026a210a0b0a0561646d696e102018200a0a0a04726f6f741020182012046e6f64651803800101880103980100b201130a077072696d61727910001a02696420012800b201240a1066616d5f325f64657363726970746f7210021a0a64657363726970746f7220022802b80103c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880302a80300b00300d00300d80300e00300f80300880400"}
,{"key":"8b898c8a89","value":"030acd050a0575736572731804200128013a00422d0a08757365726e616d6510011a0c0807100018003000501960002000300068007000780080010088010098010042330a0e68617368656450617373776f726410021a0c0808100018003000501160002001300068007000780080010088010098010042320a066973526f6c6510031a0c08001000180030005010600020002a0566616c73653000680070007800800100880100980100422c0a07757365725f696410041a0c080c100018003000501a60002000300068007000780080010088010098010048055290010a077072696d617279100118012208757365726e616d652a0e68617368656450617373776f72642a066973526f6c652a07757365725f6964300140004a10080010001a00200028003000380040005a007002700370047a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00102e00100e90100000000000000005a740a1175736572735f757365725f69645f696478100218012207757365725f69643004380140004a10080010001a00200028003000380040005a007a0408002000800100880100900103980100a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060036a250a0d0a0561646d696e10e00318e0030a0c0a04726f6f7410e00318e00312046e6f64651803800101880103980100b201240a077072696d61727910001a08757365726e616d651a07757365725f6964200120042804b2012c0a1466616d5f325f68617368656450617373776f726410021a0e68617368656450617373776f726420022802b2011c0a0c66616d5f335f6973526f6c6510031a066973526f6c6520032803b80104c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880303a80300b00300d00300d80300e00300f80300880400"}
,{"key":"8b898d8a89","value":"030a83030a057a6f6e65731805200128013a0042270a02696410011a0c08011040180030005014600020003000680070007800800100880100980100422b0a06636f6e66696710021a0c080810001800300050116000200130006800700078008001008801009801004803526d0a077072696d61727910011801220269642a06636f6e666967300140004a10080010001a00200028003000380040005a0070027a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060026a250a0d0a0561646d696e10e00318e0030a0c0a04726f6f7410e00318e00312046e6f64651803800101880103980100b201130a077072696d61727910001a02696420012800b2011c0a0c66616d5f325f636f6e66696710021a06636f6e66696720022802b80103c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880302a80300b00300d00300d80300e00300f80300880400"}
diff --git a/pkg/sql/catalog/systemschema_test/testdata/bootstrap_system b/pkg/sql/catalog/systemschema_test/testdata/bootstrap_system
index c4269bef4d09..5f2f1074e4c2 100644
--- a/pkg/sql/catalog/systemschema_test/testdata/bootstrap_system
+++ b/pkg/sql/catalog/systemschema_test/testdata/bootstrap_system
@@ -674,7 +674,7 @@ schema_telemetry
----
{"database":{"name":"defaultdb","id":100,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2","withGrantOption":"2"},{"userProto":"public","privileges":"2048"},{"userProto":"root","privileges":"2","withGrantOption":"2"}],"ownerProto":"root","version":3},"schemas":{"public":{"id":101}},"defaultPrivileges":{}}}
{"database":{"name":"postgres","id":102,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2","withGrantOption":"2"},{"userProto":"public","privileges":"2048"},{"userProto":"root","privileges":"2","withGrantOption":"2"}],"ownerProto":"root","version":3},"schemas":{"public":{"id":103}},"defaultPrivileges":{}}}
-{"database":{"name":"system","id":1,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2048","withGrantOption":"2048"},{"userProto":"root","privileges":"2048","withGrantOption":"2048"}],"ownerProto":"node","version":3},"systemDatabaseSchemaVersion":{"majorVal":1000024,"minorVal":2,"internal":22}}}
+{"database":{"name":"system","id":1,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2048","withGrantOption":"2048"},{"userProto":"root","privileges":"2048","withGrantOption":"2048"}],"ownerProto":"node","version":3},"systemDatabaseSchemaVersion":{"majorVal":24,"minorVal":2,"internal":22}}}
{"table":{"name":"comments","id":24,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"type","id":1,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"object_id","id":2,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"sub_id","id":3,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"comment","id":4,"type":{"family":"StringFamily","oid":25}}],"nextColumnId":5,"families":[{"name":"primary","columnNames":["type","object_id","sub_id"],"columnIds":[1,2,3]},{"name":"fam_4_comment","id":4,"columnNames":["comment"],"columnIds":[4],"defaultColumnId":4}],"nextFamilyId":5,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["type","object_id","sub_id"],"keyColumnDirections":["ASC","ASC","ASC"],"storeColumnNames":["comment"],"keyColumnIds":[1,2,3],"storeColumnIds":[4],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":1},"nextIndexId":2,"privileges":{"users":[{"userProto":"admin","privileges":"480","withGrantOption":"480"},{"userProto":"public","privileges":"32"},{"userProto":"root","privileges":"480","withGrantOption":"480"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":2}}
{"table":{"name":"database_role_settings","id":44,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"database_id","id":1,"type":{"family":"OidFamily","oid":26}},{"name":"role_name","id":2,"type":{"family":"StringFamily","oid":25}},{"name":"settings","id":3,"type":{"family":"ArrayFamily","arrayElemType":"StringFamily","oid":1009,"arrayContents":{"family":"StringFamily","oid":25}}},{"name":"role_id","id":4,"type":{"family":"OidFamily","oid":26}}],"nextColumnId":5,"families":[{"name":"primary","columnNames":["database_id","role_name","settings","role_id"],"columnIds":[1,2,3,4]}],"nextFamilyId":1,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["database_id","role_name"],"keyColumnDirections":["ASC","ASC"],"storeColumnNames":["settings","role_id"],"keyColumnIds":[1,2],"storeColumnIds":[3,4],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":2},"indexes":[{"name":"database_role_settings_database_id_role_id_key","id":2,"unique":true,"version":3,"keyColumnNames":["database_id","role_id"],"keyColumnDirections":["ASC","ASC"],"storeColumnNames":["settings"],"keyColumnIds":[1,4],"keySuffixColumnIds":[2],"storeColumnIds":[3],"foreignKey":{},"interleave":{},"partitioning":{},"sharded":{},"geoConfig":{},"constraintId":1}],"nextIndexId":3,"privileges":{"users":[{"userProto":"admin","privileges":"480","withGrantOption":"480"},{"userProto":"root","privileges":"480","withGrantOption":"480"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":3}}
{"table":{"name":"descriptor","id":3,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"id","id":1,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"descriptor","id":2,"type":{"family":"BytesFamily","oid":17},"nullable":true}],"nextColumnId":3,"families":[{"name":"primary","columnNames":["id"],"columnIds":[1]},{"name":"fam_2_descriptor","id":2,"columnNames":["descriptor"],"columnIds":[2],"defaultColumnId":2}],"nextFamilyId":3,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["id"],"keyColumnDirections":["ASC"],"storeColumnNames":["descriptor"],"keyColumnIds":[1],"storeColumnIds":[2],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":1},"nextIndexId":2,"privileges":{"users":[{"userProto":"admin","privileges":"32","withGrantOption":"32"},{"userProto":"root","privileges":"32","withGrantOption":"32"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":2}}
diff --git a/pkg/sql/catalog/systemschema_test/testdata/bootstrap_tenant b/pkg/sql/catalog/systemschema_test/testdata/bootstrap_tenant
index c4269bef4d09..5f2f1074e4c2 100644
--- a/pkg/sql/catalog/systemschema_test/testdata/bootstrap_tenant
+++ b/pkg/sql/catalog/systemschema_test/testdata/bootstrap_tenant
@@ -674,7 +674,7 @@ schema_telemetry
----
{"database":{"name":"defaultdb","id":100,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2","withGrantOption":"2"},{"userProto":"public","privileges":"2048"},{"userProto":"root","privileges":"2","withGrantOption":"2"}],"ownerProto":"root","version":3},"schemas":{"public":{"id":101}},"defaultPrivileges":{}}}
{"database":{"name":"postgres","id":102,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2","withGrantOption":"2"},{"userProto":"public","privileges":"2048"},{"userProto":"root","privileges":"2","withGrantOption":"2"}],"ownerProto":"root","version":3},"schemas":{"public":{"id":103}},"defaultPrivileges":{}}}
-{"database":{"name":"system","id":1,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2048","withGrantOption":"2048"},{"userProto":"root","privileges":"2048","withGrantOption":"2048"}],"ownerProto":"node","version":3},"systemDatabaseSchemaVersion":{"majorVal":1000024,"minorVal":2,"internal":22}}}
+{"database":{"name":"system","id":1,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2048","withGrantOption":"2048"},{"userProto":"root","privileges":"2048","withGrantOption":"2048"}],"ownerProto":"node","version":3},"systemDatabaseSchemaVersion":{"majorVal":24,"minorVal":2,"internal":22}}}
{"table":{"name":"comments","id":24,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"type","id":1,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"object_id","id":2,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"sub_id","id":3,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"comment","id":4,"type":{"family":"StringFamily","oid":25}}],"nextColumnId":5,"families":[{"name":"primary","columnNames":["type","object_id","sub_id"],"columnIds":[1,2,3]},{"name":"fam_4_comment","id":4,"columnNames":["comment"],"columnIds":[4],"defaultColumnId":4}],"nextFamilyId":5,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["type","object_id","sub_id"],"keyColumnDirections":["ASC","ASC","ASC"],"storeColumnNames":["comment"],"keyColumnIds":[1,2,3],"storeColumnIds":[4],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":1},"nextIndexId":2,"privileges":{"users":[{"userProto":"admin","privileges":"480","withGrantOption":"480"},{"userProto":"public","privileges":"32"},{"userProto":"root","privileges":"480","withGrantOption":"480"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":2}}
{"table":{"name":"database_role_settings","id":44,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"database_id","id":1,"type":{"family":"OidFamily","oid":26}},{"name":"role_name","id":2,"type":{"family":"StringFamily","oid":25}},{"name":"settings","id":3,"type":{"family":"ArrayFamily","arrayElemType":"StringFamily","oid":1009,"arrayContents":{"family":"StringFamily","oid":25}}},{"name":"role_id","id":4,"type":{"family":"OidFamily","oid":26}}],"nextColumnId":5,"families":[{"name":"primary","columnNames":["database_id","role_name","settings","role_id"],"columnIds":[1,2,3,4]}],"nextFamilyId":1,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["database_id","role_name"],"keyColumnDirections":["ASC","ASC"],"storeColumnNames":["settings","role_id"],"keyColumnIds":[1,2],"storeColumnIds":[3,4],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":2},"indexes":[{"name":"database_role_settings_database_id_role_id_key","id":2,"unique":true,"version":3,"keyColumnNames":["database_id","role_id"],"keyColumnDirections":["ASC","ASC"],"storeColumnNames":["settings"],"keyColumnIds":[1,4],"keySuffixColumnIds":[2],"storeColumnIds":[3],"foreignKey":{},"interleave":{},"partitioning":{},"sharded":{},"geoConfig":{},"constraintId":1}],"nextIndexId":3,"privileges":{"users":[{"userProto":"admin","privileges":"480","withGrantOption":"480"},{"userProto":"root","privileges":"480","withGrantOption":"480"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":3}}
{"table":{"name":"descriptor","id":3,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"id","id":1,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"descriptor","id":2,"type":{"family":"BytesFamily","oid":17},"nullable":true}],"nextColumnId":3,"families":[{"name":"primary","columnNames":["id"],"columnIds":[1]},{"name":"fam_2_descriptor","id":2,"columnNames":["descriptor"],"columnIds":[2],"defaultColumnId":2}],"nextFamilyId":3,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["id"],"keyColumnDirections":["ASC"],"storeColumnNames":["descriptor"],"keyColumnIds":[1],"storeColumnIds":[2],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":1},"nextIndexId":2,"privileges":{"users":[{"userProto":"admin","privileges":"32","withGrantOption":"32"},{"userProto":"root","privileges":"32","withGrantOption":"32"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":2}}
diff --git a/pkg/sql/catalog/tabledesc/logical_replication_helpers.go b/pkg/sql/catalog/tabledesc/logical_replication_helpers.go
index d9f36606d9c7..75a48d0627b8 100644
--- a/pkg/sql/catalog/tabledesc/logical_replication_helpers.go
+++ b/pkg/sql/catalog/tabledesc/logical_replication_helpers.go
@@ -6,6 +6,7 @@
package tabledesc
import (
+ "bytes"
"cmp"
"slices"
"strings"
@@ -14,6 +15,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
+ "github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/errors"
)
@@ -52,7 +54,31 @@ func CheckLogicalReplicationCompatibility(
return pgerror.Wrapf(err, pgcode.InvalidTableDefinition, cannotLDRMsg)
}
}
+ if err := checkOutboundReferences(dst); err != nil {
+ return pgerror.Wrapf(err, pgcode.InvalidTableDefinition, cannotLDRMsg)
+ }
+
+ return nil
+}
+// checkOutboundReferences verifies that the table descriptor does not
+// reference any user-defined functions, sequences, or triggers.
+func checkOutboundReferences(dst *descpb.TableDescriptor) error {
+ for _, col := range dst.Columns {
+ if len(col.UsesSequenceIds) > 0 {
+ return errors.Newf("table %s references sequences with IDs %v", dst.Name, col.UsesSequenceIds)
+ }
+ if len(col.UsesFunctionIds) > 0 {
+ return errors.Newf("table %s references functions with IDs %v", dst.Name, col.UsesFunctionIds)
+ }
+ }
+ if len(dst.Triggers) > 0 {
+ triggerNames := make([]string, len(dst.Triggers))
+ for i, trigger := range dst.Triggers {
+ triggerNames[i] = trigger.Name
+ }
+ return errors.Newf("table %s references triggers [%s]", dst.Name, strings.Join(triggerNames, ", "))
+ }
return nil
}
@@ -179,20 +205,76 @@ func checkSrcDstColsMatch(src *descpb.TableDescriptor, dst *descpb.TableDescript
)
}
- if dstCol.Type.UserDefined() {
+ if err := checkTypesMatch(srcCol.Type, dstCol.Type); err != nil {
+ return errors.Wrapf(err,
+ "destination table %s column %s has type %s, but the source table %s has type %s",
+ dst.Name, dstCol.Name, dstCol.Type.SQLStringForError(), src.Name, srcCol.Type.SQLStringForError(),
+ )
+ }
+ }
+ return nil
+}
+
+// checkTypesMatch checks that the source and destination types match. Enums
+// need to be equal in both physical and logical representations.
+func checkTypesMatch(srcTyp *types.T, dstTyp *types.T) error {
+ switch {
+ case dstTyp.TypeMeta.EnumData != nil:
+ if srcTyp.TypeMeta.EnumData == nil {
return errors.Newf(
- "destination table %s column %s has user-defined type %s",
- dst.Name, dstCol.Name, dstCol.Type.SQLStringForError(),
+ "destination type %s is an ENUM, but the source type %s is not",
+ dstTyp.SQLStringForError(), srcTyp.SQLStringForError(),
+ )
+ }
+ if !slices.Equal(srcTyp.TypeMeta.EnumData.LogicalRepresentations, dstTyp.TypeMeta.EnumData.LogicalRepresentations) {
+ return errors.Newf(
+ "destination type %s has logical representations %v, but the source type %s has %v",
+ dstTyp.SQLStringForError(), dstTyp.TypeMeta.EnumData.LogicalRepresentations,
+ srcTyp.SQLStringForError(), srcTyp.TypeMeta.EnumData.LogicalRepresentations,
+ )
+ }
+ if !slices.EqualFunc(
+ srcTyp.TypeMeta.EnumData.PhysicalRepresentations, dstTyp.TypeMeta.EnumData.PhysicalRepresentations,
+ func(x, y []byte) bool { return bytes.Equal(x, y) },
+ ) {
+ return errors.Newf(
+ "destination type %s and source type %s have mismatched physical representations",
+ dstTyp.SQLStringForError(), srcTyp.SQLStringForError(),
)
}
- if !srcCol.Type.Identical(dstCol.Type) {
+ case len(dstTyp.TupleContents()) > 0:
+ if len(srcTyp.TupleContents()) == 0 {
return errors.Newf(
- "destination table %s column %s has type %s, but the source table %s has type %s",
- dst.Name, dstCol.Name, dstCol.Type.SQLStringForError(), src.Name, srcCol.Type.SQLStringForError(),
+ "destination type %s is a tuple, but the source type %s is not",
+ dstTyp.SQLStringForError(), srcTyp.SQLStringForError(),
+ )
+ }
+ if len(dstTyp.TupleContents()) != len(srcTyp.TupleContents()) {
+ return errors.Newf(
+ "destination type %s has %d tuple elements, but the source type %s has %d tuple elements",
+ dstTyp.SQLStringForError(), len(dstTyp.TupleContents()),
+ srcTyp.SQLStringForError(), len(srcTyp.TupleContents()),
+ )
+ }
+ for i := range dstTyp.TupleContents() {
+ if err := checkTypesMatch(srcTyp.TupleContents()[i], dstTyp.TupleContents()[i]); err != nil {
+ return errors.Wrapf(err,
+ "destination type %s tuple element %d does not match source type %s tuple element %d",
+ dstTyp.SQLStringForError(), i, srcTyp.SQLStringForError(), i,
+ )
+ }
+ }
+
+ default:
+ if !srcTyp.Identical(dstTyp) {
+ return errors.Newf(
+ "destination type %s does not match source type %s",
+ dstTyp.SQLStringForError(), srcTyp.SQLStringForError(),
)
}
}
+
return nil
}
diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go
index ead2b0e60bc7..f9996e9ce514 100644
--- a/pkg/sql/conn_executor.go
+++ b/pkg/sql/conn_executor.go
@@ -4435,6 +4435,8 @@ type StatementCounters struct {
UpdateCount telemetry.CounterWithMetric
InsertCount telemetry.CounterWithMetric
DeleteCount telemetry.CounterWithMetric
+ // CRUDQueryCount includes all 4 CRUD statements above.
+ CRUDQueryCount telemetry.CounterWithMetric
// Transaction operations.
TxnBeginCount telemetry.CounterWithMetric
@@ -4499,6 +4501,8 @@ func makeStartedStatementCounters(internal bool) StatementCounters {
getMetricMeta(MetaInsertStarted, internal)),
DeleteCount: telemetry.NewCounterWithMetric(
getMetricMeta(MetaDeleteStarted, internal)),
+ CRUDQueryCount: telemetry.NewCounterWithMetric(
+ getMetricMeta(MetaCRUDStarted, internal)),
DdlCount: telemetry.NewCounterWithMetric(
getMetricMeta(MetaDdlStarted, internal)),
CopyCount: telemetry.NewCounterWithMetric(
@@ -4542,6 +4546,8 @@ func makeExecutedStatementCounters(internal bool) StatementCounters {
getMetricMeta(MetaInsertExecuted, internal)),
DeleteCount: telemetry.NewCounterWithMetric(
getMetricMeta(MetaDeleteExecuted, internal)),
+ CRUDQueryCount: telemetry.NewCounterWithMetric(
+ getMetricMeta(MetaCRUDExecuted, internal)),
DdlCount: telemetry.NewCounterWithMetric(
getMetricMeta(MetaDdlExecuted, internal)),
CopyCount: telemetry.NewCounterWithMetric(
@@ -4562,12 +4568,16 @@ func (sc *StatementCounters) incrementCount(ex *connExecutor, stmt tree.Statemen
sc.TxnBeginCount.Inc()
case *tree.Select:
sc.SelectCount.Inc()
+ sc.CRUDQueryCount.Inc()
case *tree.Update:
sc.UpdateCount.Inc()
+ sc.CRUDQueryCount.Inc()
case *tree.Insert:
sc.InsertCount.Inc()
+ sc.CRUDQueryCount.Inc()
case *tree.Delete:
sc.DeleteCount.Inc()
+ sc.CRUDQueryCount.Inc()
case *tree.CommitTransaction:
sc.TxnCommitCount.Inc()
case *tree.RollbackTransaction:
diff --git a/pkg/sql/conn_executor_exec.go b/pkg/sql/conn_executor_exec.go
index e4420bbd8e52..fb604beedc22 100644
--- a/pkg/sql/conn_executor_exec.go
+++ b/pkg/sql/conn_executor_exec.go
@@ -505,7 +505,7 @@ func (ex *connExecutor) execStmtInOpenState(
if notice, err := ex.server.cfg.LicenseEnforcer.MaybeFailIfThrottled(ctx, curOpen); err != nil {
return makeErrEvent(err)
} else if notice != nil {
- res.BufferNotice(notice)
+ p.BufferClientNotice(ctx, notice)
}
}
}
diff --git a/pkg/sql/exec_log.go b/pkg/sql/exec_log.go
index 86f167465613..1ecb31dd94ab 100644
--- a/pkg/sql/exec_log.go
+++ b/pkg/sql/exec_log.go
@@ -359,58 +359,56 @@ func (p *planner) maybeLogStatementInternal(
defer releaseSampledQuery(sampledQuery)
*sampledQuery = eventpb.SampledQuery{
- CommonSQLExecDetails: execDetails,
- SkippedQueries: skippedQueries,
- CostEstimate: p.curPlan.instrumentation.costEstimate,
- Distribution: p.curPlan.instrumentation.distribution.String(),
- PlanGist: p.curPlan.instrumentation.planGist.String(),
- SessionID: p.extendedEvalCtx.SessionID.String(),
- Database: p.CurrentDatabase(),
- StatementID: p.stmt.QueryID.String(),
- TransactionID: txnID,
- StatementFingerprintID: stmtFingerprintID.String(),
- MaxFullScanRowsEstimate: p.curPlan.instrumentation.maxFullScanRows,
- TotalScanRowsEstimate: p.curPlan.instrumentation.totalScanRows,
- OutputRowsEstimate: p.curPlan.instrumentation.outputRows,
- StatsAvailable: p.curPlan.instrumentation.statsAvailable,
- NanosSinceStatsCollected: int64(p.curPlan.instrumentation.nanosSinceStatsCollected),
- BytesRead: p.curPlan.instrumentation.topLevelStats.bytesRead,
- RowsRead: p.curPlan.instrumentation.topLevelStats.rowsRead,
- RowsWritten: p.curPlan.instrumentation.topLevelStats.rowsWritten,
- InnerJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.InnerJoin]),
- LeftOuterJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftOuterJoin]),
- FullOuterJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.FullOuterJoin]),
- SemiJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftSemiJoin]),
- AntiJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftAntiJoin]),
- IntersectAllJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.IntersectAllJoin]),
- ExceptAllJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.ExceptAllJoin]),
- HashJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.HashJoin]),
- CrossJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.CrossJoin]),
- IndexJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.IndexJoin]),
- LookupJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.LookupJoin]),
- MergeJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.MergeJoin]),
- InvertedJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.InvertedJoin]),
- ApplyJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.ApplyJoin]),
- ZigZagJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.ZigZagJoin]),
- ContentionNanos: queryLevelStats.ContentionTime.Nanoseconds(),
- Regions: queryLevelStats.Regions,
- SQLInstanceIDs: queryLevelStats.SQLInstanceIDs,
- KVNodeIDs: queryLevelStats.KVNodeIDs,
- UsedFollowerRead: queryLevelStats.UsedFollowerRead,
- NetworkBytesSent: queryLevelStats.NetworkBytesSent,
- MaxMemUsage: queryLevelStats.MaxMemUsage,
- MaxDiskUsage: queryLevelStats.MaxDiskUsage,
- KVBytesRead: queryLevelStats.KVBytesRead,
- KVPairsRead: queryLevelStats.KVPairsRead,
- KVRowsRead: queryLevelStats.KVRowsRead,
- KvTimeNanos: queryLevelStats.KVTime.Nanoseconds(),
- KvGrpcCalls: queryLevelStats.KVBatchRequestsIssued,
- NetworkMessages: queryLevelStats.NetworkMessages,
- CpuTimeNanos: queryLevelStats.CPUTime.Nanoseconds(),
- IndexRecommendations: indexRecs,
- // TODO(mgartner): Use a slice of struct{uint64, uint64} instead of
- // converting to strings.
- Indexes: p.curPlan.instrumentation.indexesUsed.Strings(),
+ CommonSQLExecDetails: execDetails,
+ SkippedQueries: skippedQueries,
+ CostEstimate: p.curPlan.instrumentation.costEstimate,
+ Distribution: p.curPlan.instrumentation.distribution.String(),
+ PlanGist: p.curPlan.instrumentation.planGist.String(),
+ SessionID: p.extendedEvalCtx.SessionID.String(),
+ Database: p.CurrentDatabase(),
+ StatementID: p.stmt.QueryID.String(),
+ TransactionID: txnID,
+ StatementFingerprintID: stmtFingerprintID.String(),
+ MaxFullScanRowsEstimate: p.curPlan.instrumentation.maxFullScanRows,
+ TotalScanRowsEstimate: p.curPlan.instrumentation.totalScanRows,
+ OutputRowsEstimate: p.curPlan.instrumentation.outputRows,
+ StatsAvailable: p.curPlan.instrumentation.statsAvailable,
+ NanosSinceStatsCollected: int64(p.curPlan.instrumentation.nanosSinceStatsCollected),
+ BytesRead: p.curPlan.instrumentation.topLevelStats.bytesRead,
+ RowsRead: p.curPlan.instrumentation.topLevelStats.rowsRead,
+ RowsWritten: p.curPlan.instrumentation.topLevelStats.rowsWritten,
+ InnerJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.InnerJoin]),
+ LeftOuterJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftOuterJoin]),
+ FullOuterJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.FullOuterJoin]),
+ SemiJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftSemiJoin]),
+ AntiJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftAntiJoin]),
+ IntersectAllJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.IntersectAllJoin]),
+ ExceptAllJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.ExceptAllJoin]),
+ HashJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.HashJoin]),
+ CrossJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.CrossJoin]),
+ IndexJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.IndexJoin]),
+ LookupJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.LookupJoin]),
+ MergeJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.MergeJoin]),
+ InvertedJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.InvertedJoin]),
+ ApplyJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.ApplyJoin]),
+ ZigZagJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.ZigZagJoin]),
+ ContentionNanos: queryLevelStats.ContentionTime.Nanoseconds(),
+ Regions: queryLevelStats.Regions,
+ SQLInstanceIDs: queryLevelStats.SQLInstanceIDs,
+ KVNodeIDs: queryLevelStats.KVNodeIDs,
+ UsedFollowerRead: queryLevelStats.UsedFollowerRead,
+ NetworkBytesSent: queryLevelStats.NetworkBytesSent,
+ MaxMemUsage: queryLevelStats.MaxMemUsage,
+ MaxDiskUsage: queryLevelStats.MaxDiskUsage,
+ KVBytesRead: queryLevelStats.KVBytesRead,
+ KVPairsRead: queryLevelStats.KVPairsRead,
+ KVRowsRead: queryLevelStats.KVRowsRead,
+ KvTimeNanos: queryLevelStats.KVTime.Nanoseconds(),
+ KvGrpcCalls: queryLevelStats.KVBatchRequestsIssued,
+ NetworkMessages: queryLevelStats.NetworkMessages,
+ CpuTimeNanos: queryLevelStats.CPUTime.Nanoseconds(),
+ IndexRecommendations: indexRecs,
+ Indexes: p.curPlan.instrumentation.indexesUsed,
ScanCount: int64(p.curPlan.instrumentation.scanCounts[exec.ScanCount]),
ScanWithStatsCount: int64(p.curPlan.instrumentation.scanCounts[exec.ScanWithStatsCount]),
ScanWithStatsForecastCount: int64(p.curPlan.instrumentation.scanCounts[exec.ScanWithStatsForecastCount]),
diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go
index 55ce0f7652f3..057cf70cd005 100644
--- a/pkg/sql/exec_util.go
+++ b/pkg/sql/exec_util.go
@@ -833,7 +833,7 @@ var (
// Below are the metadata for the statement started counters.
MetaQueryStarted = metric.Metadata{
Name: "sql.query.started.count",
- Help: "Number of SQL queries started",
+ Help: "Number of SQL operations started including queries, and transaction control statements",
Measurement: "SQL Statements",
Unit: metric.Unit_COUNT,
}
@@ -885,6 +885,12 @@ var (
Measurement: "SQL Statements",
Unit: metric.Unit_COUNT,
}
+ MetaCRUDStarted = metric.Metadata{
+ Name: "sql.crud_query.started.count",
+ Help: "Number of SQL SELECT, INSERT, UPDATE, DELETE statements started",
+ Measurement: "SQL Statements",
+ Unit: metric.Unit_COUNT,
+ }
MetaSavepointStarted = metric.Metadata{
Name: "sql.savepoint.started.count",
Help: "Number of SQL SAVEPOINT statements started",
@@ -949,7 +955,7 @@ var (
// Below are the metadata for the statement executed counters.
MetaQueryExecuted = metric.Metadata{
Name: "sql.query.count",
- Help: "Number of SQL queries executed",
+ Help: "Number of SQL operations started including queries, and transaction control statements",
Measurement: "SQL Statements",
Unit: metric.Unit_COUNT,
}
@@ -1001,6 +1007,12 @@ var (
Measurement: "SQL Statements",
Unit: metric.Unit_COUNT,
}
+ MetaCRUDExecuted = metric.Metadata{
+ Name: "sql.crud_query.count",
+ Help: "Number of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executed",
+ Measurement: "SQL Statements",
+ Unit: metric.Unit_COUNT,
+ }
MetaSavepointExecuted = metric.Metadata{
Name: "sql.savepoint.count",
Help: "Number of SQL SAVEPOINT statements successfully executed",
diff --git a/pkg/sql/executor_statement_metrics.go b/pkg/sql/executor_statement_metrics.go
index d2d7d302ea81..5056ec6559be 100644
--- a/pkg/sql/executor_statement_metrics.go
+++ b/pkg/sql/executor_statement_metrics.go
@@ -208,10 +208,8 @@ func (ex *connExecutor) recordStatementSummary(
EndTime: phaseTimes.GetSessionPhaseTime(sessionphase.PlannerStartExecStmt).Add(svcLatRaw),
FullScan: fullScan,
ExecStats: queryLevelStats,
- // TODO(mgartner): Use a slice of struct{uint64, uint64} instead of
- // converting to strings.
- Indexes: planner.instrumentation.indexesUsed.Strings(),
- Database: planner.SessionData().Database,
+ Indexes: planner.instrumentation.indexesUsed,
+ Database: planner.SessionData().Database,
}
stmtFingerprintID, err :=
diff --git a/pkg/sql/importer/BUILD.bazel b/pkg/sql/importer/BUILD.bazel
index 0653253b0609..f3c798eaeb59 100644
--- a/pkg/sql/importer/BUILD.bazel
+++ b/pkg/sql/importer/BUILD.bazel
@@ -17,7 +17,6 @@ go_library(
"import_processor.go",
"import_processor_planning.go",
"import_table_creation.go",
- "import_type_resolver.go",
"read_import_avro.go",
"read_import_base.go",
"read_import_csv.go",
@@ -32,6 +31,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/base",
+ "//pkg/ccl/crosscluster",
"//pkg/cloud",
"//pkg/cloud/cloudprivilege",
"//pkg/clusterversion",
@@ -93,7 +93,6 @@ go_library(
"//pkg/sql/sem/tree",
"//pkg/sql/sessiondata",
"//pkg/sql/sqlclustersettings",
- "//pkg/sql/sqlerrors",
"//pkg/sql/sqltelemetry",
"//pkg/sql/stats",
"//pkg/sql/types",
diff --git a/pkg/sql/importer/read_import_base.go b/pkg/sql/importer/read_import_base.go
index 2065079cb29f..963323a86d91 100644
--- a/pkg/sql/importer/read_import_base.go
+++ b/pkg/sql/importer/read_import_base.go
@@ -18,6 +18,7 @@ import (
"sync/atomic"
"time"
+ "github.com/cockroachdb/cockroach/pkg/ccl/crosscluster"
"github.com/cockroachdb/cockroach/pkg/cloud"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
@@ -55,7 +56,7 @@ func runImport(
// Install type metadata in all of the import tables.
spec = protoutil.Clone(spec).(*execinfrapb.ReadImportDataSpec)
- importResolver := MakeImportTypeResolver(spec.Types)
+ importResolver := crosscluster.MakeCrossClusterTypeResolver(spec.Types)
for _, table := range spec.Tables {
cpy := tabledesc.NewBuilder(table.Desc).BuildCreatedMutableTable()
if err := typedesc.HydrateTypesInDescriptor(ctx, cpy, importResolver); err != nil {
diff --git a/pkg/sql/instrumentation.go b/pkg/sql/instrumentation.go
index 4f8eaca6af36..e6c76d858f1a 100644
--- a/pkg/sql/instrumentation.go
+++ b/pkg/sql/instrumentation.go
@@ -26,7 +26,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/idxrecommendations"
"github.com/cockroachdb/cockroach/pkg/sql/isql"
"github.com/cockroachdb/cockroach/pkg/sql/opt/exec"
- "github.com/cockroachdb/cockroach/pkg/sql/opt/exec/execbuilder"
"github.com/cockroachdb/cockroach/pkg/sql/opt/exec/explain"
"github.com/cockroachdb/cockroach/pkg/sql/opt/indexrec"
"github.com/cockroachdb/cockroach/pkg/sql/opt/optbuilder"
@@ -226,7 +225,7 @@ type instrumentationHelper struct {
scanCounts [exec.NumScanCountTypes]int
// indexesUsed list the indexes used in the query with format tableID@indexID.
- indexesUsed execbuilder.IndexesUsed
+ indexesUsed []string
// schemachangerMode indicates which schema changer mode was used to execute
// the query.
diff --git a/pkg/sql/logictest/REPOSITORIES.bzl b/pkg/sql/logictest/REPOSITORIES.bzl
index 723c82b35307..df19ff800d02 100644
--- a/pkg/sql/logictest/REPOSITORIES.bzl
+++ b/pkg/sql/logictest/REPOSITORIES.bzl
@@ -7,17 +7,17 @@ CONFIG_DARWIN_AMD64 = "darwin-10.9-amd64"
CONFIG_DARWIN_ARM64 = "darwin-11.0-arm64"
_CONFIGS = [
- ("24.1.5", [
- (CONFIG_DARWIN_AMD64, "b6aba8395510ac2506c6cb82e2661d6d3476ff7c132016fdc823b165cbea3549"),
- (CONFIG_DARWIN_ARM64, "7b2cc8e3a53945d97bc5afd4b7457ff4962633bae9b71945ffd6e2659fa2bf5a"),
- (CONFIG_LINUX_AMD64, "731f9ade47b19119136049816edd12167423cb993ee19349fa6ce51157b9fbfc"),
- (CONFIG_LINUX_ARM64, "7ed4d67c60f1b54ed522fbdecfb4907904be6e043df6e6596bfb2894e7d82f87"),
+ ("24.1.6", [
+ (CONFIG_DARWIN_AMD64, "0d900af86357f5883ce10935bae7ea00e16ffc2d7875e56491e6d731f4565d9d"),
+ (CONFIG_DARWIN_ARM64, "985e67e66bc29955f1547f7cc0748db5532ab0c57628bdf1ce3df3c9c1fc072a"),
+ (CONFIG_LINUX_AMD64, "1120fae532f5e31411d8df06c9dac337b8116f1b167988ec2da675770c65a329"),
+ (CONFIG_LINUX_ARM64, "9d913a9080bc777645aa8a6c009f717f500856f8b3b740d5bd9e8918ddd0d88a"),
]),
- ("24.2.3", [
- (CONFIG_DARWIN_AMD64, "f3d59ed7367c8b4d8420bd1cae9f50a58114d18945ef984805403d44943447d0"),
- (CONFIG_DARWIN_ARM64, "5e70e89ef21217a80a532499f5b07618269f1ad1399732d4a55c09a71554f048"),
- (CONFIG_LINUX_AMD64, "637d0ada1db52e57f5cbbe19a7defcff0d538d43b771ae8da7ceba326686d64c"),
- (CONFIG_LINUX_ARM64, "2892c8d34e89909b871baf9c1b147c827f3b3b78285602aac33789f79fdfa210"),
+ ("24.2.4", [
+ (CONFIG_DARWIN_AMD64, "84f7dc8d5b38acb2bcf61005e1eef658a640ff4da107ef4ea9bf8feda36b3bb3"),
+ (CONFIG_DARWIN_ARM64, "c65aa4cefe1006cec67305350d69cf5c536a13e0aec06e2b508cae0578bca421"),
+ (CONFIG_LINUX_AMD64, "fa4a5696f0abd766993d5ded9b6c2cc899701be56afbf6d737baf4841c9e7bc1"),
+ (CONFIG_LINUX_ARM64, "66291ab21b9e94edf1d2c594ddbdf1ceea1c3270525982cff9be5ac1544c0281"),
]),
]
diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_schema b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_schema
index f4f6ed67e2af..5df8fb612911 100644
--- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_schema
+++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_schema
@@ -125,10 +125,6 @@ user root
statement ok
USE d
-# root must be a member of testuser to ALTER DEFAULT PRIVILEGES FOR ROLE testuser.
-statement ok
-GRANT testuser TO root
-
statement ok
ALTER DEFAULT PRIVILEGES FOR ROLE testuser REVOKE ALL ON SCHEMAS FROM testuser, testuser2
diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_sequence b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_sequence
index ec5bfb44bfca..78e876b93d37 100644
--- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_sequence
+++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_sequence
@@ -140,9 +140,6 @@ user root
statement ok
USE d
-statement ok
-GRANT testuser TO root
-
statement ok
ALTER DEFAULT PRIVILEGES FOR ROLE testuser REVOKE ALL ON SEQUENCES FROM testuser, testuser2
diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_table b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_table
index 891c60655c40..1a82f70e6c4a 100644
--- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_table
+++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_table
@@ -185,9 +185,6 @@ use d
statement ok
GRANT CREATE ON DATABASE d TO testuser
-statement ok
-GRANT testuser TO root
-
statement ok
ALTER DEFAULT PRIVILEGES FOR ROLE testuser GRANT SELECT ON TABLES to testuser, testuser2
@@ -288,7 +285,7 @@ user testuser2
statement ok
USE d
-statement error pq: must be a member of root
+statement error pq: must be an admin or member of root
ALTER DEFAULT PRIVILEGES FOR ROLE root GRANT SELECT ON TABLES TO testuser
# Ensure you can ALTER DEFAULT PRIVILEGES for multiple roles.
@@ -365,3 +362,22 @@ ALTER DEFAULT PRIVILEGES FOR ROLE public REVOKE SELECT ON TABLES FROM testuser2,
# Can specify PUBLIC as a grantee.
statement ok
ALTER DEFAULT PRIVILEGES REVOKE SELECT ON TABLES FROM public
+
+# Admins can ALTER DEFAULT PRIVILEGES for any role.
+user root
+
+# Confirm that root is not a member of testuser. We avoid using pg_has_role
+# to check, since that has a special case for all admin users.
+query TTB
+SELECT role, inheriting_member, member_is_explicit
+FROM crdb_internal.kv_inherited_role_members
+WHERE inheriting_member = 'root'
+ORDER BY role
+----
+admin root true
+
+statement ok
+ALTER DEFAULT PRIVILEGES FOR ROLE testuser GRANT ALL ON TABLES TO testuser2
+
+statement ok
+ALTER DEFAULT PRIVILEGES FOR ROLE testuser REVOKE ALL ON TABLES FROM testuser2
diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_type b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_type
index 3f7dc6982703..dd3062f0ee9f 100644
--- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_type
+++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_type
@@ -115,9 +115,6 @@ user root
statement ok
USE d
-statement ok
-GRANT testuser TO root
-
statement ok
ALTER DEFAULT PRIVILEGES FOR ROLE testuser REVOKE ALL ON TYPES FROM testuser, testuser2
diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_in_schema b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_in_schema
index 4bfbdd1038b9..22d74fd6a92a 100644
--- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_in_schema
+++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_in_schema
@@ -12,9 +12,6 @@ CREATE USER testuser2
statement ok
GRANT CREATE ON DATABASE test TO testuser
-statement ok
-GRANT testuser TO root
-
user testuser
# Test on public schema.
diff --git a/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog b/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog
index ab1c65f9b63e..5cdf428a8692 100644
--- a/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog
+++ b/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog
@@ -105,7 +105,7 @@ skipif config local-mixed-24.2
query IT
SELECT id, strip_volatile(descriptor) FROM crdb_internal.kv_catalog_descriptor ORDER BY id
----
-1 {"database": {"id": 1, "name": "system", "privileges": {"ownerProto": "node", "users": [{"privileges": "2048", "userProto": "admin", "withGrantOption": "2048"}, {"privileges": "2048", "userProto": "root", "withGrantOption": "2048"}], "version": 3}, "systemDatabaseSchemaVersion": {"internal": 22, "majorVal": 1000024, "minorVal": 2}, "version": "1"}}
+1 {"database": {"id": 1, "name": "system", "privileges": {"ownerProto": "node", "users": [{"privileges": "2048", "userProto": "admin", "withGrantOption": "2048"}, {"privileges": "2048", "userProto": "root", "withGrantOption": "2048"}], "version": 3}, "systemDatabaseSchemaVersion": {"internal": 22, "majorVal": 24, "minorVal": 2}, "version": "1"}}
3 {"table": {"columns": [{"id": 1, "name": "id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "descriptor", "nullable": true, "type": {"family": "BytesFamily", "oid": 17}}], "formatVersion": 3, "id": 3, "name": "descriptor", "nextColumnId": 3, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "parentId": 1, "primaryIndex": {"constraintId": 1, "encodingType": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "keyColumnDirections": ["ASC"], "keyColumnIds": [1], "keyColumnNames": ["id"], "name": "primary", "partitioning": {}, "sharded": {}, "storeColumnIds": [2], "storeColumnNames": ["descriptor"], "unique": true, "version": 4}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "admin", "withGrantOption": "32"}, {"privileges": "32", "userProto": "root", "withGrantOption": "32"}], "version": 3}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 29, "version": "1"}}
4 {"table": {"columns": [{"id": 1, "name": "username", "type": {"family": "StringFamily", "oid": 25}}, {"id": 2, "name": "hashedPassword", "nullable": true, "type": {"family": "BytesFamily", "oid": 17}}, {"defaultExpr": "false", "id": 3, "name": "isRole", "type": {"oid": 16}}, {"id": 4, "name": "user_id", "type": {"family": "OidFamily", "oid": 26}}], "formatVersion": 3, "id": 4, "indexes": [{"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 2, "interleave": {}, "keyColumnDirections": ["ASC"], "keyColumnIds": [4], "keyColumnNames": ["user_id"], "keySuffixColumnIds": [1], "name": "users_user_id_idx", "partitioning": {}, "sharded": {}, "unique": true, "version": 3}], "name": "users", "nextColumnId": 5, "nextConstraintId": 3, "nextIndexId": 3, "nextMutationId": 1, "parentId": 1, "primaryIndex": {"constraintId": 2, "encodingType": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "keyColumnDirections": ["ASC"], "keyColumnIds": [1], "keyColumnNames": ["username"], "name": "primary", "partitioning": {}, "sharded": {}, "storeColumnIds": [2, 3, 4], "storeColumnNames": ["hashedPassword", "isRole", "user_id"], "unique": true, "version": 4}, "privileges": {"ownerProto": "node", "users": [{"privileges": "480", "userProto": "admin", "withGrantOption": "480"}, {"privileges": "480", "userProto": "root", "withGrantOption": "480"}], "version": 3}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 29, "version": "2"}}
5 {"table": {"columns": [{"id": 1, "name": "id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "config", "nullable": true, "type": {"family": "BytesFamily", "oid": 17}}], "formatVersion": 3, "id": 5, "name": "zones", "nextColumnId": 3, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "parentId": 1, "primaryIndex": {"constraintId": 1, "encodingType": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "keyColumnDirections": ["ASC"], "keyColumnIds": [1], "keyColumnNames": ["id"], "name": "primary", "partitioning": {}, "sharded": {}, "storeColumnIds": [2], "storeColumnNames": ["config"], "unique": true, "version": 4}, "privileges": {"ownerProto": "node", "users": [{"privileges": "480", "userProto": "admin", "withGrantOption": "480"}, {"privileges": "480", "userProto": "root", "withGrantOption": "480"}], "version": 3}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 29, "version": "1"}}
diff --git a/pkg/sql/logictest/testdata/logic_test/reassign_owned_by b/pkg/sql/logictest/testdata/logic_test/reassign_owned_by
index 0646f2101caf..4e8756a81530 100644
--- a/pkg/sql/logictest/testdata/logic_test/reassign_owned_by
+++ b/pkg/sql/logictest/testdata/logic_test/reassign_owned_by
@@ -243,8 +243,19 @@ user root
statement ok
REVOKE CREATE ON DATABASE test FROM testuser, testuser2;
DROP ROLE testuser;
+
+# Ownership of the public schema was transferred to testuser2.
+
+statement error role testuser2 cannot be dropped because some objects depend on it\nowner of schema test.public
+DROP ROLE testuser2
+
+statement ok
+REASSIGN OWNED BY testuser2 TO root
+
+statement ok
DROP ROLE testuser2
+
# ------------------------------------------------------------------------------
# Make sure only objects in the current database are reassigned
diff --git a/pkg/sql/logictest/testdata/logic_test/show_default_privileges b/pkg/sql/logictest/testdata/logic_test/show_default_privileges
index 2b51a2d9072e..d65c2e22001b 100644
--- a/pkg/sql/logictest/testdata/logic_test/show_default_privileges
+++ b/pkg/sql/logictest/testdata/logic_test/show_default_privileges
@@ -226,7 +226,6 @@ use test2;
CREATE USER testuser2;
statement ok
-GRANT testuser TO root;
ALTER DEFAULT PRIVILEGES FOR ROLE testuser GRANT DROP, ZONECONFIG ON TABLES TO foo WITH GRANT OPTION;
query TBTTTB colnames,rowsort
diff --git a/pkg/sql/opt/exec/execbuilder/BUILD.bazel b/pkg/sql/opt/exec/execbuilder/BUILD.bazel
index b06cb525c606..ad8b1921d6ed 100644
--- a/pkg/sql/opt/exec/execbuilder/BUILD.bazel
+++ b/pkg/sql/opt/exec/execbuilder/BUILD.bazel
@@ -47,6 +47,7 @@ go_library(
"//pkg/sql/sqlerrors",
"//pkg/sql/sqltelemetry",
"//pkg/sql/types",
+ "//pkg/util",
"//pkg/util/buildutil",
"//pkg/util/encoding",
"//pkg/util/errorutil",
diff --git a/pkg/sql/opt/exec/execbuilder/builder.go b/pkg/sql/opt/exec/execbuilder/builder.go
index c529e9ea3f81..34830973e177 100644
--- a/pkg/sql/opt/exec/execbuilder/builder.go
+++ b/pkg/sql/opt/exec/execbuilder/builder.go
@@ -7,8 +7,6 @@ package execbuilder
import (
"context"
- "slices"
- "strconv"
"time"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
@@ -170,41 +168,7 @@ type Builder struct {
IsANSIDML bool
// IndexesUsed list the indexes used in query with the format tableID@indexID.
- IndexesUsed
-}
-
-// IndexesUsed is a list of indexes used in a query.
-type IndexesUsed struct {
- indexes []struct {
- tableID cat.StableID
- indexID cat.StableID
- }
-}
-
-// add adds the given index to the list, if it is not already present.
-func (iu *IndexesUsed) add(tableID, indexID cat.StableID) {
- s := struct {
- tableID cat.StableID
- indexID cat.StableID
- }{tableID, indexID}
- if !slices.Contains(iu.indexes, s) {
- iu.indexes = append(iu.indexes, s)
- }
-}
-
-// Strings returns a slice of strings with the format tableID@indexID for each
-// index in the list.
-//
-// TODO(mgartner): Use a slice of struct{uint64, uint64} instead of converting
-// to strings.
-func (iu *IndexesUsed) Strings() []string {
- res := make([]string, len(iu.indexes))
- const base = 10
- for i, u := range iu.indexes {
- res[i] = strconv.FormatUint(uint64(u.tableID), base) + "@" +
- strconv.FormatUint(uint64(u.indexID), base)
- }
- return res
+ IndexesUsed []string
}
// New constructs an instance of the execution node builder using the
diff --git a/pkg/sql/opt/exec/execbuilder/relational.go b/pkg/sql/opt/exec/execbuilder/relational.go
index 8a70b344376e..ecdc5b305c1a 100644
--- a/pkg/sql/opt/exec/execbuilder/relational.go
+++ b/pkg/sql/opt/exec/execbuilder/relational.go
@@ -38,6 +38,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
"github.com/cockroachdb/cockroach/pkg/sql/types"
+ "github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/errorutil"
@@ -755,7 +756,7 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (_ execPlan, outputCols colOrdM
return execPlan{}, colOrdMap{},
errors.AssertionFailedf("expected inverted index scan to have a constraint")
}
- b.IndexesUsed.add(tab.ID(), idx.ID())
+ b.IndexesUsed = util.CombineUnique(b.IndexesUsed, []string{fmt.Sprintf("%d@%d", tab.ID(), idx.ID())})
// Save if we planned a full (large) table/index scan on the builder so that
// the planner can be made aware later. We only do this for non-virtual
@@ -2296,7 +2297,7 @@ func (b *Builder) buildIndexJoin(
// TODO(radu): the distsql implementation of index join assumes that the input
// starts with the PK columns in order (#40749).
pri := tab.Index(cat.PrimaryIndex)
- b.IndexesUsed.add(tab.ID(), pri.ID())
+ b.IndexesUsed = util.CombineUnique(b.IndexesUsed, []string{fmt.Sprintf("%d@%d", tab.ID(), pri.ID())})
keyCols := make([]exec.NodeColumnOrdinal, pri.KeyColumnCount())
for i := range keyCols {
keyCols[i], err = getNodeColumnOrdinal(inputCols, join.Table.ColumnID(pri.Column(i).Ordinal()))
@@ -2674,7 +2675,7 @@ func (b *Builder) buildLookupJoin(
tab := md.Table(join.Table)
idx := tab.Index(join.Index)
- b.IndexesUsed.add(tab.ID(), idx.ID())
+ b.IndexesUsed = util.CombineUnique(b.IndexesUsed, []string{fmt.Sprintf("%d@%d", tab.ID(), idx.ID())})
locking, err := b.buildLocking(join.Table, join.Locking)
if err != nil {
@@ -2854,7 +2855,7 @@ func (b *Builder) buildInvertedJoin(
md := b.mem.Metadata()
tab := md.Table(join.Table)
idx := tab.Index(join.Index)
- b.IndexesUsed.add(tab.ID(), idx.ID())
+ b.IndexesUsed = util.CombineUnique(b.IndexesUsed, []string{fmt.Sprintf("%d@%d", tab.ID(), idx.ID())})
prefixEqCols := make([]exec.NodeColumnOrdinal, len(join.PrefixKeyCols))
for i, c := range join.PrefixKeyCols {
@@ -2996,8 +2997,10 @@ func (b *Builder) buildZigzagJoin(
rightTable := md.Table(join.RightTable)
leftIndex := leftTable.Index(join.LeftIndex)
rightIndex := rightTable.Index(join.RightIndex)
- b.IndexesUsed.add(leftTable.ID(), leftIndex.ID())
- b.IndexesUsed.add(rightTable.ID(), rightIndex.ID())
+ b.IndexesUsed = util.CombineUnique(b.IndexesUsed,
+ []string{fmt.Sprintf("%d@%d", leftTable.ID(), leftIndex.ID())})
+ b.IndexesUsed = util.CombineUnique(b.IndexesUsed,
+ []string{fmt.Sprintf("%d@%d", rightTable.ID(), rightIndex.ID())})
leftEqCols := make([]exec.TableColumnOrdinal, len(join.LeftEqCols))
rightEqCols := make([]exec.TableColumnOrdinal, len(join.RightEqCols))
diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index
index 755a2b10e0bd..88e158ccdbfb 100644
--- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index
+++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index
@@ -2681,55 +2681,58 @@ ALTER TABLE d INJECT STATISTICS '[
}
]';
-# Filter with a fully-specified array. This should use a minimal inverted index
-# scan.
+# Filter with a fully-specified array. This should use a zigzag join.
query T
EXPLAIN SELECT a FROM d WHERE b @> '[1, 2]' ORDER BY a
----
distribution: local
vectorized: true
·
-• filter
+• sort
│ estimated row count: 1,247
-│ filter: b @> '[1, 2]'
+│ order: +a
│
-└── • index join
- │ estimated row count: 1,020
+└── • lookup join
+ │ estimated row count: 1,247
│ table: d@d_pkey
+ │ equality: (a) = (a)
+ │ equality cols are key
│
- └── • sort
- │ estimated row count: 1,020
- │ order: +a
- │
- └── • scan
- estimated row count: 1,020 (1.0% of the table; stats collected ago)
- table: d@foo_inv
- spans: 1 span
+ └── • zigzag join
+ estimated row count: 1,247
+ left table: d@foo_inv
+ left columns: (a, b_inverted_key)
+ left fixed values: 1 column
+ right table: d@foo_inv
+ right columns: (a, b_inverted_key)
+ right fixed values: 1 column
# Combine predicates with AND. Should have the same output as b @> '[1, 2]'.
-# This should use a minimal inverted index scan.
+# This should use a zigzag join.
query T
EXPLAIN SELECT a FROM d WHERE b @> '[1]' AND b @> '[2]' ORDER BY a
----
distribution: local
vectorized: true
·
-• filter
+• sort
│ estimated row count: 1,247
-│ filter: (b @> '[1]') AND (b @> '[2]')
+│ order: +a
│
-└── • index join
- │ estimated row count: 1,020
+└── • lookup join
+ │ estimated row count: 1,247
│ table: d@d_pkey
+ │ equality: (a) = (a)
+ │ equality cols are key
│
- └── • sort
- │ estimated row count: 1,020
- │ order: +a
- │
- └── • scan
- estimated row count: 1,020 (1.0% of the table; stats collected ago)
- table: d@foo_inv
- spans: 1 span
+ └── • zigzag join
+ estimated row count: 1,247
+ left table: d@foo_inv
+ left columns: (a, b_inverted_key)
+ left fixed values: 1 column
+ right table: d@foo_inv
+ right columns: (a, b_inverted_key)
+ right fixed values: 1 column
# Filter with a nested array. This index expression is not tight.
# This should use a zigzag join.
diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-array b/pkg/sql/opt/memo/testdata/stats/inverted-array
index 86d81db43fb9..680a6dcbec4f 100644
--- a/pkg/sql/opt/memo/testdata/stats/inverted-array
+++ b/pkg/sql/opt/memo/testdata/stats/inverted-array
@@ -6,10 +6,10 @@ CREATE TABLE t (
)
----
-# Histogram boundaries are for arrays with values 1, 2, and 3, including some
-# empty arrays. The row_count is lower than the sum of the histogram buckets
-# num_eq's because some rows can have multiple inverted index entries, for
-# example `{1, 2}`. There are:
+# Histogram boundaries are for JSON values `{}`, `{1}`, `{2}`, `{3}`. The
+# row_count is lower than the sum of the histogram buckets num_eq's because some
+# rows can have multiple inverted index entries, for example `{1, 2}`. There
+# are:
#
# - 1000 rows total
# - 10 empty arrays
diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-json b/pkg/sql/opt/memo/testdata/stats/inverted-json
index 257ea6dda3f0..758827c11ad5 100644
--- a/pkg/sql/opt/memo/testdata/stats/inverted-json
+++ b/pkg/sql/opt/memo/testdata/stats/inverted-json
@@ -747,13 +747,28 @@ select
│ ├── stats: [rows=4e-07]
│ ├── key: (1)
│ ├── fd: (1)-->(2)
- │ └── scan t@j_idx,inverted
+ │ └── inverted-filter
│ ├── columns: k:1(int!null)
- │ ├── inverted constraint: /5/1
- │ │ └── spans: ["a"/"b"/"c", "a"/"b"/"c"]
- │ ├── stats: [rows=4e-07, distinct(5)=4e-07, null(5)=0]
- │ │ histogram(5)=
- │ └── key: (1)
+ │ ├── inverted expression: /5
+ │ │ ├── tight: false, unique: true
+ │ │ ├── union spans: empty
+ │ │ └── INTERSECTION
+ │ │ ├── span expression
+ │ │ │ ├── tight: true, unique: true
+ │ │ │ └── union spans: ["a"/"b"/"c", "a"/"b"/"c"]
+ │ │ └── span expression
+ │ │ ├── tight: true, unique: true
+ │ │ └── union spans: ["a"/"d"/"e", "a"/"d"/"e"]
+ │ ├── stats: [rows=4e-07]
+ │ ├── key: (1)
+ │ └── scan t@j_idx,inverted
+ │ ├── columns: k:1(int!null) j_inverted_key:5(encodedkey!null)
+ │ ├── inverted constraint: /5/1
+ │ │ └── spans
+ │ │ ├── ["a"/"b"/"c", "a"/"b"/"c"]
+ │ │ └── ["a"/"d"/"e", "a"/"d"/"e"]
+ │ └── stats: [rows=4e-07, distinct(1)=4e-07, null(1)=0, distinct(5)=4e-07, null(5)=0]
+ │ histogram(5)=
└── filters
└── (j:2->'a') = '{"b": "c", "d": "e"}' [type=bool, outer=(2), immutable]
@@ -773,13 +788,44 @@ select
│ ├── stats: [rows=4e-07]
│ ├── key: (1)
│ ├── fd: (1)-->(2)
- │ └── scan t@j_idx,inverted
+ │ └── inverted-filter
│ ├── columns: k:1(int!null)
- │ ├── inverted constraint: /5/1
- │ │ └── spans: ["a"/Arr/"b", "a"/Arr/"b"]
- │ ├── stats: [rows=4e-07, distinct(5)=4e-07, null(5)=0]
- │ │ histogram(5)=
- │ └── key: (1)
+ │ ├── inverted expression: /5
+ │ │ ├── tight: false, unique: true
+ │ │ ├── union spans: empty
+ │ │ └── INTERSECTION
+ │ │ ├── span expression
+ │ │ │ ├── tight: true, unique: true
+ │ │ │ ├── union spans: empty
+ │ │ │ └── INTERSECTION
+ │ │ │ ├── span expression
+ │ │ │ │ ├── tight: true, unique: true
+ │ │ │ │ ├── union spans: empty
+ │ │ │ │ └── INTERSECTION
+ │ │ │ │ ├── span expression
+ │ │ │ │ │ ├── tight: true, unique: true
+ │ │ │ │ │ └── union spans: ["a"/Arr/"b", "a"/Arr/"b"]
+ │ │ │ │ └── span expression
+ │ │ │ │ ├── tight: true, unique: true
+ │ │ │ │ └── union spans: ["a"/Arr/"c", "a"/Arr/"c"]
+ │ │ │ └── span expression
+ │ │ │ ├── tight: true, unique: true
+ │ │ │ └── union spans: ["a"/Arr/"d", "a"/Arr/"d"]
+ │ │ └── span expression
+ │ │ ├── tight: true, unique: true
+ │ │ └── union spans: ["a"/Arr/"e", "a"/Arr/"e"]
+ │ ├── stats: [rows=4e-07]
+ │ ├── key: (1)
+ │ └── scan t@j_idx,inverted
+ │ ├── columns: k:1(int!null) j_inverted_key:5(encodedkey!null)
+ │ ├── inverted constraint: /5/1
+ │ │ └── spans
+ │ │ ├── ["a"/Arr/"b", "a"/Arr/"b"]
+ │ │ ├── ["a"/Arr/"c", "a"/Arr/"c"]
+ │ │ ├── ["a"/Arr/"d", "a"/Arr/"d"]
+ │ │ └── ["a"/Arr/"e", "a"/Arr/"e"]
+ │ └── stats: [rows=4e-07, distinct(1)=4e-07, null(1)=0, distinct(5)=4e-07, null(5)=0]
+ │ histogram(5)=
└── filters
└── (j:2->'a') = '["b", "c", "d", "e"]' [type=bool, outer=(2), immutable]
@@ -800,13 +846,36 @@ select
│ ├── stats: [rows=4e-07]
│ ├── key: (1)
│ ├── fd: (1)-->(2)
- │ └── scan t@j_idx,inverted
+ │ └── inverted-filter
│ ├── columns: k:1(int!null)
- │ ├── inverted constraint: /5/1
- │ │ └── spans: ["a"/"b"/Arr/"c", "a"/"b"/Arr/"c"]
- │ ├── stats: [rows=4e-07, distinct(5)=4e-07, null(5)=0]
- │ │ histogram(5)=
- │ └── key: (1)
+ │ ├── inverted expression: /5
+ │ │ ├── tight: false, unique: true
+ │ │ ├── union spans: empty
+ │ │ └── INTERSECTION
+ │ │ ├── span expression
+ │ │ │ ├── tight: true, unique: true
+ │ │ │ ├── union spans: empty
+ │ │ │ └── INTERSECTION
+ │ │ │ ├── span expression
+ │ │ │ │ ├── tight: true, unique: true
+ │ │ │ │ └── union spans: ["a"/"b"/Arr/"c", "a"/"b"/Arr/"c"]
+ │ │ │ └── span expression
+ │ │ │ ├── tight: true, unique: true
+ │ │ │ └── union spans: ["a"/"b"/Arr/"d", "a"/"b"/Arr/"d"]
+ │ │ └── span expression
+ │ │ ├── tight: true, unique: true
+ │ │ └── union spans: ["a"/"b"/Arr/"e", "a"/"b"/Arr/"e"]
+ │ ├── stats: [rows=4e-07]
+ │ ├── key: (1)
+ │ └── scan t@j_idx,inverted
+ │ ├── columns: k:1(int!null) j_inverted_key:5(encodedkey!null)
+ │ ├── inverted constraint: /5/1
+ │ │ └── spans
+ │ │ ├── ["a"/"b"/Arr/"c", "a"/"b"/Arr/"c"]
+ │ │ ├── ["a"/"b"/Arr/"d", "a"/"b"/Arr/"d"]
+ │ │ └── ["a"/"b"/Arr/"e", "a"/"b"/Arr/"e"]
+ │ └── stats: [rows=4e-07, distinct(1)=4e-07, null(1)=0, distinct(5)=4e-07, null(5)=0]
+ │ histogram(5)=
└── filters
└── (j:2->'a') = '{"b": ["c", "d", "e"]}' [type=bool, outer=(2), immutable]
@@ -1088,13 +1157,28 @@ select
│ ├── stats: [rows=4e-07]
│ ├── key: (1)
│ ├── fd: (1)-->(2)
- │ └── scan t@j_idx,inverted
+ │ └── inverted-filter
│ ├── columns: k:1(int!null)
- │ ├── inverted constraint: /5/1
- │ │ └── spans: ["a"/Arr/1, "a"/Arr/1]
- │ ├── stats: [rows=4e-07, distinct(5)=4e-07, null(5)=0]
- │ │ histogram(5)=
- │ └── key: (1)
+ │ ├── inverted expression: /5
+ │ │ ├── tight: false, unique: true
+ │ │ ├── union spans: empty
+ │ │ └── INTERSECTION
+ │ │ ├── span expression
+ │ │ │ ├── tight: true, unique: true
+ │ │ │ └── union spans: ["a"/Arr/1, "a"/Arr/1]
+ │ │ └── span expression
+ │ │ ├── tight: true, unique: true
+ │ │ └── union spans: ["a"/Arr/2, "a"/Arr/2]
+ │ ├── stats: [rows=4e-07]
+ │ ├── key: (1)
+ │ └── scan t@j_idx,inverted
+ │ ├── columns: k:1(int!null) j_inverted_key:5(encodedkey!null)
+ │ ├── inverted constraint: /5/1
+ │ │ └── spans
+ │ │ ├── ["a"/Arr/1, "a"/Arr/1]
+ │ │ └── ["a"/Arr/2, "a"/Arr/2]
+ │ └── stats: [rows=4e-07, distinct(1)=4e-07, null(1)=0, distinct(5)=4e-07, null(5)=0]
+ │ histogram(5)=
└── filters
└── (j:2->'a') @> '[1, 2]' [type=bool, outer=(2), immutable]
diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go
index 2f015223ad69..0af4302b0557 100644
--- a/pkg/sql/opt/props/histogram.go
+++ b/pkg/sql/opt/props/histogram.go
@@ -121,66 +121,6 @@ func (h *Histogram) ValuesCount() float64 {
return count
}
-// EqEstimate returns the estimated number of rows that equal the given
-// datum. If the datum is equal to a bucket's upperbound, it returns the
-// bucket's NumEq. If the datum falls in the range of a bucket's upper and lower
-// bounds, it returns the bucket's NumRange divided by the bucket's
-// DistinctRange. Otherwise, if the datum does not fall into any bucket in the
-// histogram or any comparison between the datum and a bucket's upperbound
-// results in an error, then it returns the total number of values in the
-// histogram divided by the total number of distinct values.
-func (h *Histogram) EqEstimate(ctx context.Context, d tree.Datum) float64 {
- // Find the bucket belonging to the datum. It is the first bucket where the
- // datum is less than or equal to the upperbound.
- bucketIdx := binarySearch(len(h.buckets), func(i int) (bool, error) {
- cmp, err := d.Compare(ctx, h.evalCtx, h.upperBound(i))
- return cmp <= 0, err
- })
- if bucketIdx < len(h.buckets) {
- if cmp, err := d.Compare(ctx, h.evalCtx, h.upperBound(bucketIdx)); err == nil {
- if cmp == 0 {
- return h.numEq(bucketIdx)
- }
- if bucketIdx != 0 {
- if h.distinctRange(bucketIdx) == 0 {
- // Avoid dividing by zero.
- return 0
- }
- return h.numRange(bucketIdx) / h.distinctRange(bucketIdx)
- }
- // The value d is less than the upper bound of the first bucket, so
- // it is outside the bounds of the histogram. Fallback to the total
- // number of values divided by the total number of distinct values.
- }
- }
- totalDistinct := h.DistinctValuesCount()
- if totalDistinct == 0 {
- // Avoid dividing by zero.
- return 0
- }
- return h.ValuesCount() / h.DistinctValuesCount()
-}
-
-// binarySearch extends sort.Search to allow the search function to return an
-// error. It returns the smallest index i in [0, n) at which f(i) is true,
-// assuming that on the range [0, n), f(i) == true implies f(i+1) == true. If
-// there is no such index, or if f returns an error for any invocation, it
-// returns n.
-func binarySearch(n int, f func(int) (bool, error)) (idx int) {
- defer func() {
- if r := recover(); r != nil {
- idx = n
- }
- }()
- return sort.Search(n, func(i int) bool {
- res, err := f(i)
- if err != nil {
- panic(err)
- }
- return res
- })
-}
-
// DistinctValuesCount returns the estimated number of distinct values in the
// histogram.
func (h *Histogram) DistinctValuesCount() float64 {
diff --git a/pkg/sql/opt/props/histogram_test.go b/pkg/sql/opt/props/histogram_test.go
index 7b29d7a082ae..f5ec9a58675f 100644
--- a/pkg/sql/opt/props/histogram_test.go
+++ b/pkg/sql/opt/props/histogram_test.go
@@ -24,55 +24,6 @@ import (
"github.com/cockroachdb/errors"
)
-func TestEqEstimate(t *testing.T) {
- ctx := context.Background()
- evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
-
- emptyHist := &Histogram{}
- emptyHist.Init(&evalCtx, opt.ColumnID(1), []cat.HistogramBucket{})
-
- if eq := emptyHist.EqEstimate(ctx, tree.NewDInt(0)); eq != 0 {
- t.Errorf("expected %f but found %f", 0.0, eq)
- }
-
- // 0 1 3 3 4 5 0 0 40 35
- // <--- 1 --- 10 --- 25 --- 30 ---- 42
- histData := []cat.HistogramBucket{
- {NumRange: 0, DistinctRange: 0, NumEq: 1, UpperBound: tree.NewDInt(1)},
- {NumRange: 3, DistinctRange: 2, NumEq: 3, UpperBound: tree.NewDInt(10)},
- {NumRange: 4, DistinctRange: 2, NumEq: 5, UpperBound: tree.NewDInt(25)},
- {NumRange: 0, DistinctRange: 0, NumEq: 0, UpperBound: tree.NewDInt(30)},
- {NumRange: 40, DistinctRange: 7, NumEq: 35, UpperBound: tree.NewDInt(42)},
- }
- h := &Histogram{}
- h.Init(&evalCtx, opt.ColumnID(1), histData)
-
- testData := []struct {
- datum tree.Datum
- expected float64
- }{
- {tree.NewDInt(1), 1},
- {tree.NewDInt(9), 3.0 / 2},
- {tree.NewDInt(10), 3},
- {tree.NewDInt(11), 4.0 / 2},
- {tree.NewDInt(25), 5},
- {tree.NewDInt(28), 0},
- {tree.NewDInt(30), 0},
- {tree.NewDInt(35), 40.0 / 7},
- {tree.NewDInt(42), 35},
- // Use an all-bucket average for values outside the bounds of the
- // histogram.
- {tree.NewDInt(0), h.ValuesCount() / h.DistinctValuesCount()},
- {tree.NewDInt(43), h.ValuesCount() / h.DistinctValuesCount()},
- }
-
- for i, tc := range testData {
- if eq := h.EqEstimate(ctx, tc.datum); eq != tc.expected {
- t.Errorf("testcase %d: expected %f but found %f", i, tc.expected, eq)
- }
- }
-}
-
func TestCanFilter(t *testing.T) {
ctx := context.Background()
evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
diff --git a/pkg/sql/opt/xform/rules/select.opt b/pkg/sql/opt/xform/rules/select.opt
index 87f3e8a2cb53..03e44ded434e 100644
--- a/pkg/sql/opt/xform/rules/select.opt
+++ b/pkg/sql/opt/xform/rules/select.opt
@@ -53,22 +53,6 @@
=>
(GenerateInvertedIndexScans $scanPrivate $filters)
-# GenerateMinimalInvertedIndexScans is similar to GenerateInvertedIndexScans. It
-# differs by trying to generate an inverted index scan that spans the fewest
-# index keys, rather than generating scans that span all index keys in the
-# expression and performing set operations on them before an index-join.
-[GenerateMinimalInvertedIndexScans, Explore]
-(Select
- $input:(Scan
- $scanPrivate:* &
- (IsCanonicalScan $scanPrivate) &
- (HasInvertedIndexes $scanPrivate)
- )
- $filters:*
-)
-=>
-(GenerateMinimalInvertedIndexScans $input $scanPrivate $filters)
-
# GenerateTrigramSimilarityInvertedIndexScans generates scans on inverted
# trigram indexes that are constrained by similarity filters (e.g.,
# `s & % 'foo'`). It is similar conceptually to GenerateInvertedIndexScans, but
diff --git a/pkg/sql/opt/xform/select_funcs.go b/pkg/sql/opt/xform/select_funcs.go
index 4002df08b56e..0591c45fab88 100644
--- a/pkg/sql/opt/xform/select_funcs.go
+++ b/pkg/sql/opt/xform/select_funcs.go
@@ -6,7 +6,6 @@
package xform
import (
- "context"
"sort"
"github.com/cockroachdb/cockroach/pkg/sql/inverted"
@@ -16,7 +15,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/opt/invertedidx"
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
"github.com/cockroachdb/cockroach/pkg/sql/opt/partition"
- "github.com/cockroachdb/cockroach/pkg/sql/opt/props"
"github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
@@ -841,47 +839,6 @@ func (c *CustomFuncs) GenerateInvertedIndexScans(
scanPrivate *memo.ScanPrivate,
filters memo.FiltersExpr,
) {
- c.generateInvertedIndexScansImpl(
- grp,
- nil, /* input */
- scanPrivate,
- filters,
- false, /* minimizeSpans */
- )
-}
-
-// GenerateMinimalInvertedIndexScans is similar to GenerateInvertedIndexScans.
-// It differs by trying to generate an inverted index scan that spans the fewest
-// index keys, rather than generating scans that span all index keys in the
-// expression and performing set operations on them before an index-join. It
-// currently only works on JSON and array inverted indexes.
-//
-// TODO(mgartner): It may be simpler to implement these scans with constraints
-// rather than inverted spans. It may also allow more fine-grained control over
-// the remaining filters applied after the scan.
-func (c *CustomFuncs) GenerateMinimalInvertedIndexScans(
- grp memo.RelExpr,
- required *physical.Required,
- input memo.RelExpr,
- scanPrivate *memo.ScanPrivate,
- filters memo.FiltersExpr,
-) {
- c.generateInvertedIndexScansImpl(grp, input, scanPrivate, filters, true /* minimizeSpans */)
-}
-
-// generateInvertedIndexScansImpl is the implementation of
-// GenerateInvertedIndexScans and GenerateMinimalInvertedIndexScans.
-func (c *CustomFuncs) generateInvertedIndexScansImpl(
- grp memo.RelExpr,
- input memo.RelExpr,
- scanPrivate *memo.ScanPrivate,
- filters memo.FiltersExpr,
- minimizeSpans bool,
-) {
- if input == nil && minimizeSpans {
- panic(errors.AssertionFailedf("expected non-nil input required to reduce spans"))
- }
-
var pkCols opt.ColSet
var sb indexScanBuilder
sb.Init(c, scanPrivate.Table)
@@ -897,17 +854,8 @@ func (c *CustomFuncs) generateInvertedIndexScansImpl(
var iter scanIndexIter
iter.Init(c.e.evalCtx, c.e, c.e.mem, &c.im, scanPrivate, filters, rejectNonInvertedIndexes)
iter.ForEach(func(index cat.Index, filters memo.FiltersExpr, indexCols opt.ColSet, _ bool, _ memo.ProjectionsExpr) {
- invColID := scanPrivate.Table.ColumnID(index.InvertedColumn().InvertedSourceColumnOrdinal())
- invColTypeFamily := c.e.f.Metadata().ColumnMeta(invColID).Type.Family()
- jsonOrArray := invColTypeFamily == types.JsonFamily || invColTypeFamily == types.ArrayFamily
-
- // Only attempt to reduce spans for JSON and array inverted indexes.
- if minimizeSpans && !jsonOrArray {
- return
- }
-
// Check whether the filter can constrain the index.
- spanExpr, con, remainingFilters, pfState, ok := invertedidx.TryFilterInvertedIndex(
+ spanExpr, constraint, remainingFilters, pfState, ok := invertedidx.TryFilterInvertedIndex(
c.e.ctx, c.e.evalCtx, c.e.f, filters, optionalFilters, scanPrivate.Table, index, tabMeta.ComputedCols,
c.checkCancellation,
)
@@ -916,18 +864,6 @@ func (c *CustomFuncs) generateInvertedIndexScansImpl(
// generated.
return
}
- if minimizeSpans {
- newSpanExpr, ok := reduceInvertedSpans(c.e.ctx, input, scanPrivate.Table, index, spanExpr)
- if !ok {
- // The span expression could not be reduced, so skip this index.
- // An inverted index scan may still be generated for it when
- // minimizeSpans=false.
- return
- }
- spanExpr = newSpanExpr
- // If the span was reduced, the original filters must be applied.
- remainingFilters = filters
- }
spansToRead := spanExpr.SpansToRead
// Override the filters with remainingFilters. If the index is a
// multi-column inverted index, the non-inverted prefix columns are
@@ -953,7 +889,7 @@ func (c *CustomFuncs) generateInvertedIndexScansImpl(
newScanPrivate := *scanPrivate
newScanPrivate.Distribution.Regions = nil
newScanPrivate.Index = index.Ordinal()
- newScanPrivate.SetConstraint(c.e.ctx, c.e.evalCtx, con)
+ newScanPrivate.SetConstraint(c.e.ctx, c.e.evalCtx, constraint)
newScanPrivate.InvertedConstraint = spansToRead
if scanPrivate.Flags.NoIndexJoin {
@@ -999,86 +935,6 @@ func (c *CustomFuncs) generateInvertedIndexScansImpl(
})
}
-// reduceInvertedSpans attempts to reduce the spans-to-scan in the given span
-// expression by finding the lowest cardinality, conjunctive span. If the given
-// span expression cannot be reduced, ok=false is returned.
-func reduceInvertedSpans(
- ctx context.Context,
- grp memo.RelExpr,
- tabID opt.TableID,
- index cat.Index,
- spanExpr *inverted.SpanExpression,
-) (newSpan *inverted.SpanExpression, ok bool) {
- // Span expressions that are not unions or intersections cannot be reduced.
- if spanExpr.Operator == inverted.None {
- return nil, false
- }
-
- colID := tabID.ColumnID(index.InvertedColumn().Ordinal())
- colStat, ok := grp.Memo().RequestColStat(grp, opt.MakeColSet(colID))
- if !ok || colStat.Histogram == nil {
- // Only attempt to reduce spans if we have histogram statistics.
- // TODO(mgartner): We could blindly reduce the spans without a
- // histogram, which will probably be better than doing nothing.
- return nil, false
- }
- histogram := colStat.Histogram
-
- var lowestCardinality float64
- var findLowestCardinalitySpan func(span *inverted.SpanExpression)
- findLowestCardinalitySpan = func(span *inverted.SpanExpression) {
- switch span.Operator {
- case inverted.SetIntersection:
- // Recurse into each side looking for the lowest cardinality span.
- if len(span.FactoredUnionSpans) > 0 {
- // Check that FactoredUnionSpans is empty. A span expression
- // with non-empty FactoredUnionSpans is equivalent to a UNION
- // between the FactoredUnionSpans and the intersected children,
- // so we can't reduce the span.
- return
- }
- l, ok := span.Left.(*inverted.SpanExpression)
- if !ok {
- return
- }
- r, ok := span.Right.(*inverted.SpanExpression)
- if !ok {
- return
- }
- findLowestCardinalitySpan(l)
- findLowestCardinalitySpan(r)
- case inverted.SetUnion, inverted.None:
- // We cannot recurse into unions because both sides must be scanned.
- // So we consider a union a "leaf".
- cardinality, ok := cardinalityEstimate(ctx, histogram, span)
- if ok && (newSpan == nil || cardinality < lowestCardinality) {
- newSpan = span
- lowestCardinality = cardinality
- }
- }
- }
- findLowestCardinalitySpan(spanExpr)
-
- return newSpan, newSpan != nil
-}
-
-// cardinalityEstimate returns an estimated number of rows that will be scanned
-// with spanExpr based on the given histogram.
-func cardinalityEstimate(
- ctx context.Context, histogram *props.Histogram, spanExpr *inverted.SpanExpression,
-) (cardinality float64, ok bool) {
- for i := range spanExpr.SpansToRead {
- span := spanExpr.SpansToRead[i]
- if !span.IsSingleVal() {
- // We can currently only estimate the cardinality of single-valued
- // spans.
- return 0, false
- }
- cardinality += histogram.EqEstimate(ctx, tree.NewDEncodedKey(tree.DEncodedKey(span.Start)))
- }
- return cardinality, true
-}
-
// GenerateTrigramSimilarityInvertedIndexScans generates scans on inverted
// trigram indexes that are constrained by similarity filters (e.g.,
// `s % 'foo'`). It is similar conceptually to GenerateInvertedIndexScans, but
diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select
index 968106f49268..e53bb7b15508 100644
--- a/pkg/sql/opt/xform/testdata/rules/select
+++ b/pkg/sql/opt/xform/testdata/rules/select
@@ -8517,426 +8517,6 @@ project
└── projections
└── 1 [as="?column?":15]
-
-# --------------------------------------------------
-# GenerateMinimalInvertedIndexScans
-# --------------------------------------------------
-
-exec-ddl
-CREATE TABLE min (
- k INT PRIMARY KEY,
- j JSON,
- a INT[],
- INVERTED INDEX j_idx (j),
- INVERTED INDEX a_idx (a)
-)
-----
-
-# Histogram boundaries are for arrays with values 1, 2, 3, and 4, including some
-# empty arrays. The row_count is lower than the sum of the histogram buckets
-# num_eq's because some rows can have multiple inverted index entries, for
-# example `{1, 2}`. There are:
-#
-# - 2000 rows total
-# - 10 empty arrays
-# - 1990 arrays encoded into 2020 index entries
-#
-# Histogram boundaries are for JSON values `[]`, `{}`, `[1]`, `[2]`, `[3]`,
-# `{"a": "b"}`, `{"c": "d"}`, and `{"e": "f"}`. The row_count is lower than the
-# sum of the histogram buckets num_eq's because some rows can have multiple
-# inverted index entries, for example `{"a": "b", "c": "d"}`. There are:
-#
-# - 2000 rows total
-# - 10 empty arrays
-# - 990 arrays encoded into 1110 index entries
-# - 10 empty objects
-# - 990 objects encoded into 1110 index entries
-#
-exec-ddl
-ALTER TABLE min INJECT STATISTICS '[
- {
- "columns": ["a"],
- "created_at": "2018-01-01 1:00:00.00000+00:00",
- "row_count": 2000,
- "distinct_count": 3,
- "null_count": 0,
- "histo_col_type": "BYTES",
- "histo_buckets": [
- {
- "distinct_range": 0,
- "num_eq": 10,
- "num_range": 0,
- "upper_bound": "\\x43"
- },
- {
- "distinct_range": 0,
- "num_eq": 1990,
- "num_range": 0,
- "upper_bound": "\\x89"
- },
- {
- "distinct_range": 0,
- "num_eq": 10,
- "num_range": 0,
- "upper_bound": "\\x8a"
- },
- {
- "distinct_range": 0,
- "num_eq": 10,
- "num_range": 0,
- "upper_bound": "\\x8b"
- },
- {
- "distinct_range": 0,
- "num_eq": 10,
- "num_range": 0,
- "upper_bound": "\\x8c"
- }
- ]
- },
- {
- "columns": ["j"],
- "created_at": "2018-01-01 1:00:00.00000+00:00",
- "row_count": 2000,
- "distinct_count": 10,
- "null_count": 0,
- "histo_col_type": "BYTES",
- "histo_buckets": [
- {
- "distinct_range": 0,
- "num_eq": 10,
- "num_range": 0,
- "upper_bound": "\\x37000138"
- },
- {
- "distinct_range": 0,
- "num_eq": 10,
- "num_range": 0,
- "upper_bound": "\\x37000139"
- },
- {
- "distinct_range": 0,
- "num_eq": 990,
- "num_range": 0,
- "upper_bound": "\\x37000300012a0200"
- },
- {
- "distinct_range": 0,
- "num_eq": 100,
- "num_range": 0,
- "upper_bound": "\\x37000300012a0400"
- },
- {
- "distinct_range": 0,
- "num_eq": 10,
- "num_range": 0,
- "upper_bound": "\\x37000300012a0600"
- },
- {
- "distinct_range": 0,
- "num_eq": 990,
- "num_range": 0,
- "upper_bound": "\\x3761000112620001"
- },
- {
- "distinct_range": 0,
- "num_eq": 100,
- "num_range": 0,
- "upper_bound": "\\x3763000112640001"
- },
- {
- "distinct_range": 0,
- "num_eq": 10,
- "num_range": 0,
- "upper_bound": "\\x3765000112660001"
- }
- ]
- }
-]'
-----
-
-# Scan over 3 since there are fewer rows containing 3 than 1.
-# TODO(mgartner): The remaining filters could be reduced.
-opt expect=GenerateMinimalInvertedIndexScans
-SELECT * FROM min WHERE a @> '{1}' AND a @> '{3}'
-----
-select
- ├── columns: k:1!null j:2 a:3!null
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- ├── index-join min
- │ ├── columns: k:1!null j:2 a:3
- │ ├── key: (1)
- │ ├── fd: (1)-->(2,3)
- │ └── scan min@a_idx,inverted
- │ ├── columns: k:1!null
- │ ├── inverted constraint: /7/1
- │ │ └── spans: [3, 3]
- │ └── key: (1)
- └── filters
- ├── a:3 @> ARRAY[1] [outer=(3), immutable, constraints=(/3: (/NULL - ])]
- └── a:3 @> ARRAY[3] [outer=(3), immutable, constraints=(/3: (/NULL - ])]
-
-# Scan over 2 since there are fewer rows containing 2 than 1.
-opt expect=GenerateMinimalInvertedIndexScans
-SELECT * FROM min WHERE a @> '{1, 2}'
-----
-select
- ├── columns: k:1!null j:2 a:3!null
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- ├── index-join min
- │ ├── columns: k:1!null j:2 a:3
- │ ├── key: (1)
- │ ├── fd: (1)-->(2,3)
- │ └── scan min@a_idx,inverted
- │ ├── columns: k:1!null
- │ ├── inverted constraint: /7/1
- │ │ └── spans: [2, 2]
- │ └── key: (1)
- └── filters
- └── a:3 @> ARRAY[1,2] [outer=(3), immutable, constraints=(/3: (/NULL - ])]
-
-opt expect=GenerateMinimalInvertedIndexScans
-SELECT * FROM min@a_idx WHERE a @> '{2}' AND (a @> '{1}' OR a @> '{3}')
-----
-select
- ├── columns: k:1!null j:2 a:3!null
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- ├── index-join min
- │ ├── columns: k:1!null j:2 a:3
- │ ├── key: (1)
- │ ├── fd: (1)-->(2,3)
- │ └── scan min@a_idx,inverted
- │ ├── columns: k:1!null
- │ ├── inverted constraint: /7/1
- │ │ └── spans: [2, 2]
- │ ├── flags: force-index=a_idx
- │ └── key: (1)
- └── filters
- ├── a:3 @> ARRAY[2] [outer=(3), immutable, constraints=(/3: (/NULL - ])]
- └── (a:3 @> ARRAY[1]) OR (a:3 @> ARRAY[3]) [outer=(3), immutable, constraints=(/3: (/NULL - ])]
-
-opt expect=GenerateMinimalInvertedIndexScans
-SELECT * FROM min@a_idx WHERE (a @> '{2}' OR a @> '{4}') AND a @> '{1}'
-----
-select
- ├── columns: k:1!null j:2 a:3!null
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- ├── index-join min
- │ ├── columns: k:1!null j:2 a:3
- │ ├── key: (1)
- │ ├── fd: (1)-->(2,3)
- │ └── inverted-filter
- │ ├── columns: k:1!null
- │ ├── inverted expression: /7
- │ │ ├── tight: true, unique: false
- │ │ └── union spans
- │ │ ├── [2, 2]
- │ │ └── [4, 4]
- │ ├── key: (1)
- │ └── scan min@a_idx,inverted
- │ ├── columns: k:1!null a_inverted_key:7!null
- │ ├── inverted constraint: /7/1
- │ │ └── spans
- │ │ ├── [2, 2]
- │ │ └── [4, 4]
- │ └── flags: force-index=a_idx
- └── filters
- ├── (a:3 @> ARRAY[2]) OR (a:3 @> ARRAY[4]) [outer=(3), immutable, constraints=(/3: (/NULL - ])]
- └── a:3 @> ARRAY[1] [outer=(3), immutable, constraints=(/3: (/NULL - ])]
-
-# TODO(mgartner): Scanning [2 - 3] would be better, but the current
-# implementation can only estimate the row count for single-value spans.
-opt expect=GenerateMinimalInvertedIndexScans
-SELECT * FROM min@a_idx WHERE (a @> '{2}' OR a @> '{3}') AND a @> '{1}'
-----
-index-join min
- ├── columns: k:1!null j:2 a:3!null
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- └── inverted-filter
- ├── columns: k:1!null
- ├── inverted expression: /7
- │ ├── tight: true, unique: false
- │ ├── union spans: empty
- │ └── INTERSECTION
- │ ├── span expression
- │ │ ├── tight: true, unique: false
- │ │ └── union spans: [2, 4)
- │ └── span expression
- │ ├── tight: true, unique: true
- │ └── union spans: [1, 1]
- ├── key: (1)
- └── scan min@a_idx,inverted
- ├── columns: k:1!null a_inverted_key:7!null
- ├── inverted constraint: /7/1
- │ └── spans: [1, 4)
- └── flags: force-index=a_idx
-
-opt expect=GenerateMinimalInvertedIndexScans
-SELECT * FROM min@a_idx WHERE a @> '{2, 3}' AND a @> '{1}'
-----
-select
- ├── columns: k:1!null j:2 a:3!null
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- ├── index-join min
- │ ├── columns: k:1!null j:2 a:3
- │ ├── key: (1)
- │ ├── fd: (1)-->(2,3)
- │ └── scan min@a_idx,inverted
- │ ├── columns: k:1!null
- │ ├── inverted constraint: /7/1
- │ │ └── spans: [2, 2]
- │ ├── flags: force-index=a_idx
- │ └── key: (1)
- └── filters
- ├── a:3 @> ARRAY[2,3] [outer=(3), immutable, constraints=(/3: (/NULL - ])]
- └── a:3 @> ARRAY[1] [outer=(3), immutable, constraints=(/3: (/NULL - ])]
-
-# The rule only applies when there are multiple spans to reduce.
-opt expect-not=GenerateMinimalInvertedIndexScans format=hide-all
-SELECT * FROM min WHERE a @> '{1}'
-----
-select
- ├── scan min
- └── filters
- └── a @> ARRAY[1]
-
-opt expect=GenerateMinimalInvertedIndexScans
-SELECT * FROM min@j_idx WHERE j @> '[1]' AND j @> '[3]'
-----
-select
- ├── columns: k:1!null j:2!null a:3
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- ├── index-join min
- │ ├── columns: k:1!null j:2 a:3
- │ ├── key: (1)
- │ ├── fd: (1)-->(2,3)
- │ └── scan min@j_idx,inverted
- │ ├── columns: k:1!null
- │ ├── inverted constraint: /6/1
- │ │ └── spans: [Arr/3, Arr/3]
- │ ├── flags: force-index=j_idx
- │ └── key: (1)
- └── filters
- ├── j:2 @> '[1]' [outer=(2), immutable, constraints=(/2: (/NULL - ])]
- └── j:2 @> '[3]' [outer=(2), immutable, constraints=(/2: (/NULL - ])]
-
-opt expect=GenerateMinimalInvertedIndexScans
-SELECT * FROM min@j_idx WHERE j @> '[2]' AND j @> '[3]'
-----
-select
- ├── columns: k:1!null j:2!null a:3
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- ├── index-join min
- │ ├── columns: k:1!null j:2 a:3
- │ ├── key: (1)
- │ ├── fd: (1)-->(2,3)
- │ └── scan min@j_idx,inverted
- │ ├── columns: k:1!null
- │ ├── inverted constraint: /6/1
- │ │ └── spans: [Arr/3, Arr/3]
- │ ├── flags: force-index=j_idx
- │ └── key: (1)
- └── filters
- ├── j:2 @> '[2]' [outer=(2), immutable, constraints=(/2: (/NULL - ])]
- └── j:2 @> '[3]' [outer=(2), immutable, constraints=(/2: (/NULL - ])]
-
-# The rule only applies when there are multiple spans to reduce.
-opt expect-not=GenerateMinimalInvertedIndexScans format=hide-all
-SELECT * FROM min WHERE j @> '[3]'
-----
-index-join min
- └── scan min@j_idx,inverted
- └── inverted constraint: /6/1
- └── spans: [Arr/3, Arr/3]
-
-# The rule does not apply when for a disjunction of spans.
-opt expect-not=GenerateMinimalInvertedIndexScans format=hide-all
-SELECT * FROM b WHERE j @> '[3]' OR j @> '[[1, 2]]'
-----
-select
- ├── index-join b
- │ └── inverted-filter
- │ ├── inverted expression: /9
- │ │ ├── tight: false, unique: true
- │ │ ├── union spans: [Arr/3, Arr/3]
- │ │ └── INTERSECTION
- │ │ ├── span expression
- │ │ │ ├── tight: true, unique: true
- │ │ │ └── union spans: [Arr/Arr/1, Arr/Arr/1]
- │ │ └── span expression
- │ │ ├── tight: true, unique: true
- │ │ └── union spans: [Arr/Arr/2, Arr/Arr/2]
- │ └── scan b@j_inv_idx,inverted
- │ └── inverted constraint: /9/1
- │ └── spans
- │ ├── [Arr/3, Arr/3]
- │ ├── [Arr/Arr/1, Arr/Arr/1]
- │ └── [Arr/Arr/2, Arr/Arr/2]
- └── filters
- └── (j @> '[3]') OR (j @> '[[1, 2]]')
-
-opt expect=GenerateMinimalInvertedIndexScans disable=GenerateInvertedIndexZigzagJoins
-SELECT * FROM min@j_idx WHERE j->'a' = '"b"' AND j->'c' = '"d"'
-----
-select
- ├── columns: k:1!null j:2 a:3
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- ├── index-join min
- │ ├── columns: k:1!null j:2 a:3
- │ ├── key: (1)
- │ ├── fd: (1)-->(2,3)
- │ └── scan min@j_idx,inverted
- │ ├── columns: k:1!null
- │ ├── inverted constraint: /6/1
- │ │ └── spans: ["c"/"d", "c"/"d"]
- │ ├── flags: force-index=j_idx
- │ └── key: (1)
- └── filters
- ├── (j:2->'a') = '"b"' [outer=(2), immutable]
- └── (j:2->'c') = '"d"' [outer=(2), immutable]
-
-opt expect=GenerateMinimalInvertedIndexScans
-SELECT * FROM min@j_idx WHERE j->'a' = '"b"' AND j->'c' = '"d"' AND j->'e' = '"f"'
-----
-select
- ├── columns: k:1!null j:2 a:3
- ├── immutable
- ├── key: (1)
- ├── fd: (1)-->(2,3)
- ├── index-join min
- │ ├── columns: k:1!null j:2 a:3
- │ ├── key: (1)
- │ ├── fd: (1)-->(2,3)
- │ └── scan min@j_idx,inverted
- │ ├── columns: k:1!null
- │ ├── inverted constraint: /6/1
- │ │ └── spans: ["e"/"f", "e"/"f"]
- │ ├── flags: force-index=j_idx
- │ └── key: (1)
- └── filters
- ├── (j:2->'a') = '"b"' [outer=(2), immutable]
- ├── (j:2->'c') = '"d"' [outer=(2), immutable]
- └── (j:2->'e') = '"f"' [outer=(2), immutable]
-
-
# --------------------------------------------------
# GenerateZigzagJoins
# --------------------------------------------------
@@ -10094,7 +9674,7 @@ ALTER TABLE b INJECT STATISTICS '[
# Query only the primary key with a remaining filter. 2+ paths in containment
# query should favor zigzag joins.
-opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans
+opt expect=GenerateInvertedIndexZigzagJoins
SELECT k FROM b WHERE j @> '{"a": "b", "c": "d"}'
----
project
@@ -10117,7 +9697,7 @@ project
└── filters (true)
# Query requiring a zigzag join with a remaining filter.
-opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans
+opt expect=GenerateInvertedIndexZigzagJoins
SELECT j, k FROM b WHERE j @> '{"a": "b", "c": "d"}'
----
inner-join (lookup b)
@@ -10135,7 +9715,7 @@ inner-join (lookup b)
│ └── filters (true)
└── filters (true)
-opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans
+opt expect=GenerateInvertedIndexZigzagJoins
SELECT * FROM b WHERE j @> '{"a": {"b": "c", "d": "e"}, "f": "g"}'
----
inner-join (lookup b)
@@ -10155,7 +9735,7 @@ inner-join (lookup b)
└── j:4 @> '{"a": {"b": "c", "d": "e"}, "f": "g"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])]
# Three or more paths. Should generate zigzag joins.
-opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans
+opt expect=GenerateInvertedIndexZigzagJoins
SELECT * FROM b WHERE j @> '{"a":[{"b":"c", "d":3}, 5]}'
----
inner-join (lookup b)
@@ -10232,7 +9812,7 @@ ALTER TABLE c INJECT STATISTICS '[
# We need a remaining filter since only two of the three values
# are covered by the zigzag join.
-opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans
+opt expect=GenerateInvertedIndexZigzagJoins
SELECT k FROM c WHERE a @> ARRAY[1,3,1,5]
----
project
@@ -10256,7 +9836,7 @@ project
└── a:2 @> ARRAY[1,3,1,5] [outer=(2), immutable, constraints=(/2: (/NULL - ])]
# Regression test for #95270. We should not need any remaining filter.
-opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans
+opt expect=GenerateInvertedIndexZigzagJoins
SELECT k FROM c WHERE a @> ARRAY[1,2]
----
project
@@ -10279,7 +9859,7 @@ project
└── filters (true)
# The first path can't be used for a zigzag join, but the second two can.
-opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans
+opt expect=GenerateInvertedIndexZigzagJoins
SELECT * FROM b WHERE j @> '{"a":{}, "b":2, "c":3}'
----
inner-join (lookup b)
@@ -10335,7 +9915,7 @@ select
└── (j:4 @> '[3]') OR (j:4 @> '[[1, 2]]') [outer=(4), immutable, constraints=(/4: (/NULL - ])]
# GenerateInvertedIndexZigzagJoins propagates row-level locking information.
-opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans
+opt expect=GenerateInvertedIndexZigzagJoins
SELECT * FROM b WHERE j @> '{"a":1, "c":2}' FOR UPDATE
----
inner-join (lookup b)
@@ -10364,23 +9944,26 @@ project
├── columns: k:1!null
├── immutable
├── key: (1)
- └── select
- ├── columns: k:1!null j:4!null
- ├── immutable
+ └── inverted-filter
+ ├── columns: k:1!null
+ ├── inverted expression: /9
+ │ ├── tight: true, unique: true
+ │ ├── union spans: empty
+ │ └── INTERSECTION
+ │ ├── span expression
+ │ │ ├── tight: true, unique: true
+ │ │ └── union spans: ["a"/"b", "a"/"b"]
+ │ └── span expression
+ │ ├── tight: true, unique: true
+ │ └── union spans: ["c"/"d", "c"/"d"]
├── key: (1)
- ├── fd: (1)-->(4)
- ├── index-join b
- │ ├── columns: k:1!null j:4
- │ ├── key: (1)
- │ ├── fd: (1)-->(4)
- │ └── scan b@j_inv_idx,inverted
- │ ├── columns: k:1!null
- │ ├── inverted constraint: /9/1
- │ │ └── spans: ["c"/"d", "c"/"d"]
- │ ├── flags: no-zigzag-join
- │ └── key: (1)
- └── filters
- └── j:4 @> '{"a": "b", "c": "d"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])]
+ └── scan b@j_inv_idx,inverted
+ ├── columns: k:1!null j_inverted_key:9!null
+ ├── inverted constraint: /9/1
+ │ └── spans
+ │ ├── ["a"/"b", "a"/"b"]
+ │ └── ["c"/"d", "c"/"d"]
+ └── flags: no-zigzag-join
exec-ddl
CREATE TABLE inv_zz_partial (
diff --git a/pkg/sql/plan_opt.go b/pkg/sql/plan_opt.go
index ebac9192a34b..468786db010a 100644
--- a/pkg/sql/plan_opt.go
+++ b/pkg/sql/plan_opt.go
@@ -149,14 +149,7 @@ func (p *planner) prepareUsingOptimizer(
stmt.Prepared.StatementNoConstants = pm.StatementNoConstants
stmt.Prepared.Columns = pm.Columns
stmt.Prepared.Types = pm.Types
- if cachedData.Memo.IsOptimized() {
- // A cache, fully optimized memo is an "ideal generic
- // memo".
- stmt.Prepared.GenericMemo = cachedData.Memo
- stmt.Prepared.IdealGenericPlan = true
- } else {
- stmt.Prepared.BaseMemo = cachedData.Memo
- }
+ stmt.Prepared.BaseMemo = cachedData.Memo
return opc.flags, nil
}
opc.log(ctx, "query cache hit but memo is stale (prepare)")
@@ -170,7 +163,7 @@ func (p *planner) prepareUsingOptimizer(
}
// Build the memo. Do not attempt to build a generic plan at PREPARE-time.
- memo, _, err := opc.buildReusableMemo(ctx, false /* allowNonIdealGeneric */)
+ memo, _, err := opc.buildReusableMemo(ctx, false /* buildGeneric */)
if err != nil {
return 0, err
}
@@ -220,14 +213,7 @@ func (p *planner) prepareUsingOptimizer(
stmt.Prepared.Columns = resultCols
stmt.Prepared.Types = p.semaCtx.Placeholders.Types
if opc.allowMemoReuse {
- if memo.IsOptimized() {
- // A memo fully optimized at prepare time is an "ideal generic
- // memo".
- stmt.Prepared.GenericMemo = memo
- stmt.Prepared.IdealGenericPlan = true
- } else {
- stmt.Prepared.BaseMemo = memo
- }
+ stmt.Prepared.BaseMemo = memo
if opc.useCache {
// execPrepare sets the PrepareMetadata.InferredTypes field after this
// point. However, once the PrepareMetadata goes into the cache, it
@@ -433,13 +419,13 @@ const (
// 1. The statement does not contain placeholders nor fold-able stable
// operators.
// 2. Or, the placeholder fast path is used.
-// 3. Or, allowNonIdealGeneric is true and the plan is fully optimized as best
-// as possible in the presence of placeholders.
+// 3. Or, buildGeneric is true and the plan is fully optimized as best as
+// possible in the presence of placeholders.
//
// The returned memo is fully detached from the planner and can be used with
// reuseMemo independently and concurrently by multiple threads.
func (opc *optPlanningCtx) buildReusableMemo(
- ctx context.Context, allowNonIdealGeneric bool,
+ ctx context.Context, buildGeneric bool,
) (*memo.Memo, memoType, error) {
p := opc.p
@@ -520,7 +506,7 @@ func (opc *optPlanningCtx) buildReusableMemo(
opc.log(ctx, "placeholder fast path")
opc.flags.Set(planFlagOptimized)
return opc.optimizer.DetachMemo(ctx), memoTypeIdealGeneric, nil
- } else if allowNonIdealGeneric {
+ } else if buildGeneric {
// Build a generic query plan if the placeholder fast path failed and a
// generic plan was requested.
opc.log(ctx, "optimizing (generic)")
@@ -544,7 +530,9 @@ func (opc *optPlanningCtx) buildReusableMemo(
//
// The returned memo is only safe to use in one thread, during execution of the
// current statement.
-func (opc *optPlanningCtx) reuseMemo(cachedMemo *memo.Memo) (*memo.Memo, error) {
+func (opc *optPlanningCtx) reuseMemo(
+ ctx context.Context, cachedMemo *memo.Memo,
+) (*memo.Memo, error) {
opc.incPlanTypeTelemetry(cachedMemo)
if cachedMemo.IsOptimized() {
// The query could have been already fully optimized in
@@ -593,15 +581,11 @@ func (opc *optPlanningCtx) incPlanTypeTelemetry(cachedMemo *memo.Memo) {
// useGenericPlan returns true if a generic query plan should be used instead of
// a custom plan.
func (opc *optPlanningCtx) useGenericPlan() bool {
- prep := opc.p.stmt.Prepared
- // Always use an ideal generic plan.
- if prep.IdealGenericPlan {
- return true
- }
switch opc.p.SessionData().PlanCacheMode {
case sessiondatapb.PlanCacheModeForceGeneric:
return true
case sessiondatapb.PlanCacheModeAuto:
+ prep := opc.p.stmt.Prepared
// We need to build CustomPlanThreshold custom plans before considering
// a generic plan.
if prep.Costs.NumCustom() < CustomPlanThreshold {
@@ -625,7 +609,7 @@ func (opc *optPlanningCtx) useGenericPlan() bool {
// from, baseMemo or genericMemo. It returns nil if both memos are stale. It
// selects baseMemo or genericMemo based on the following rules, in order:
//
-// 1. If the generic memo is ideal, it is returned as-is.
+// 1. If baseMemo is fully optimized and not stale, it is returned as-is.
// 2. If plan_cache_mode=force_generic_plan is true then genericMemo is
// returned as-is if it is not stale.
// 3. If plan_cache_mode=auto, there have been at least 5 custom plans
@@ -638,37 +622,54 @@ func (opc *optPlanningCtx) useGenericPlan() bool {
// stale.
// 5. Otherwise, nil is returned and the caller is responsible for building a
// new memo.
-func (opc *optPlanningCtx) chooseValidPreparedMemo(ctx context.Context) (*memo.Memo, error) {
- prep := opc.p.stmt.Prepared
- if opc.useGenericPlan() {
- if prep.GenericMemo == nil {
- // A generic plan does not yet exist.
- return nil, nil
+//
+// The logic is structured to avoid unnecessary (*memo.Memo).IsStale calls,
+// since they can be expensive.
+func (opc *optPlanningCtx) chooseValidPreparedMemo(
+ ctx context.Context, baseMemo *memo.Memo, genericMemo *memo.Memo,
+) (*memo.Memo, error) {
+ // First check for a fully optimized, non-stale, base memo.
+ if baseMemo != nil && baseMemo.IsOptimized() {
+ isStale, err := baseMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog)
+ if err != nil {
+ return nil, err
+ } else if !isStale {
+ return baseMemo, nil
}
- isStale, err := prep.GenericMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog)
+ }
+
+ prep := opc.p.stmt.Prepared
+ reuseGeneric := opc.useGenericPlan()
+
+ // Next check for a non-stale, generic memo.
+ if reuseGeneric && genericMemo != nil {
+ isStale, err := genericMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog)
if err != nil {
return nil, err
} else if !isStale {
- return prep.GenericMemo, nil
+ return genericMemo, nil
+ } else {
+ // Clear the generic cost if the memo is stale. DDL or new stats
+ // could drastically change the cost of generic and custom plans, so
+ // we should re-consider which to use.
+ prep.Costs.ClearGeneric()
}
- // Clear the generic cost if the memo is stale. DDL or new stats
- // could drastically change the cost of generic and custom plans, so
- // we should re-consider which to use.
- prep.Costs.ClearGeneric()
- return nil, nil
}
- if prep.BaseMemo != nil {
- isStale, err := prep.BaseMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog)
+ // Next, check for a non-stale, normalized memo, if a generic memo should
+ // not be reused.
+ if !reuseGeneric && baseMemo != nil && !baseMemo.IsOptimized() {
+ isStale, err := baseMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog)
if err != nil {
return nil, err
} else if !isStale {
- return prep.BaseMemo, nil
+ return baseMemo, nil
+ } else {
+ // Clear the custom costs if the memo is stale. DDL or new stats
+ // could drastically change the cost of generic and custom plans, so
+ // we should re-consider which to use.
+ prep.Costs.ClearCustom()
}
- // Clear the custom costs if the memo is stale. DDL or new stats
- // could drastically change the cost of generic and custom plans, so
- // we should re-consider which to use.
- prep.Costs.ClearCustom()
}
// A valid memo was not found.
@@ -706,13 +707,13 @@ func (opc *optPlanningCtx) fetchPreparedMemo(ctx context.Context) (_ *memo.Memo,
// If the statement was previously prepared, check for a reusable memo.
// First check for a valid (non-stale) memo.
- validMemo, err := opc.chooseValidPreparedMemo(ctx)
+ validMemo, err := opc.chooseValidPreparedMemo(ctx, prep.BaseMemo, prep.GenericMemo)
if err != nil {
return nil, err
}
if validMemo != nil {
opc.log(ctx, "reusing cached memo")
- return opc.reuseMemo(validMemo)
+ return opc.reuseMemo(ctx, validMemo)
}
// Otherwise, we need to rebuild the memo.
@@ -726,34 +727,60 @@ func (opc *optPlanningCtx) fetchPreparedMemo(ctx context.Context) (_ *memo.Memo,
if err != nil {
return nil, err
}
- if opc.allowMemoReuse {
- switch typ {
- case memoTypeIdealGeneric:
- // An "ideal" generic memo will always be used regardless of
- // plan_cache_mode, so there is no need to set GenericCost.
- prep.GenericMemo = newMemo
- prep.IdealGenericPlan = true
- case memoTypeGeneric:
- prep.GenericMemo = newMemo
- prep.Costs.SetGeneric(newMemo.RootExpr().(memo.RelExpr).Cost())
- // Now that the cost of the generic plan is known, we need to
- // re-evaluate the decision to use a generic or custom plan.
- if !opc.useGenericPlan() {
- // The generic plan that we just built is too expensive, so we need
- // to build a custom plan. We recursively call fetchPreparedMemo in
- // case we have a custom plan that can be reused as a starting point
- // for optimization. The function should not recurse more than once.
- return opc.fetchPreparedMemo(ctx)
- }
- case memoTypeCustom:
- prep.BaseMemo = newMemo
- default:
- return nil, errors.AssertionFailedf("unexpected memo type %v", typ)
+ switch typ {
+ case memoTypeIdealGeneric:
+ // If we have an "ideal" generic memo, store it as a base memo. It will
+ // always be used regardless of plan_cache_mode, so there is no need to
+ // set GenericCost.
+ prep.BaseMemo = newMemo
+ case memoTypeGeneric:
+ prep.GenericMemo = newMemo
+ prep.Costs.SetGeneric(newMemo.RootExpr().(memo.RelExpr).Cost())
+ // Now that the cost of the generic plan is known, we need to
+ // re-evaluate the decision to use a generic or custom plan.
+ if !opc.useGenericPlan() {
+ // The generic plan that we just built is too expensive, so we need
+ // to build a custom plan. We recursively call fetchPreparedMemo in
+ // case we have a custom plan that can be reused as a starting point
+ // for optimization. The function should not recurse more than once.
+ return opc.fetchPreparedMemo(ctx)
}
+ case memoTypeCustom:
+ prep.BaseMemo = newMemo
+ default:
+ return nil, errors.AssertionFailedf("unexpected memo type %v", typ)
}
// Re-optimize the memo, if necessary.
- return opc.reuseMemo(newMemo)
+ return opc.reuseMemo(ctx, newMemo)
+}
+
+// fetchPreparedMemoLegacy attempts to fetch a prepared memo. If a valid (i.e.,
+// non-stale) memo is found, it is used. Otherwise, a new statement will be
+// built. If memo reuse is not allowed, nil is returned.
+func (opc *optPlanningCtx) fetchPreparedMemoLegacy(ctx context.Context) (_ *memo.Memo, err error) {
+ prepared := opc.p.stmt.Prepared
+ p := opc.p
+ if opc.allowMemoReuse && prepared != nil && prepared.BaseMemo != nil {
+ // We are executing a previously prepared statement and a reusable memo is
+ // available.
+
+ // If the prepared memo has been invalidated by schema or other changes,
+ // re-prepare it.
+ if isStale, err := prepared.BaseMemo.IsStale(ctx, p.EvalContext(), opc.catalog); err != nil {
+ return nil, err
+ } else if isStale {
+ opc.log(ctx, "rebuilding cached memo")
+ prepared.BaseMemo, _, err = opc.buildReusableMemo(ctx, false /* buildGeneric */)
+ if err != nil {
+ return nil, err
+ }
+ }
+ opc.log(ctx, "reusing cached memo")
+ return opc.reuseMemo(ctx, prepared.BaseMemo)
+ }
+
+ return nil, nil
}
// buildExecMemo creates a fully optimized memo, possibly reusing a previously
@@ -767,19 +794,32 @@ func (opc *optPlanningCtx) buildExecMemo(ctx context.Context) (_ *memo.Memo, _ e
// rollback its transaction. Use resumeProc to resume execution in a new
// transaction where the control statement left off.
opc.log(ctx, "resuming stored procedure execution in a new transaction")
- return opc.reuseMemo(resumeProc)
+ return opc.reuseMemo(ctx, resumeProc)
}
- // Fetch and reuse a memo if a valid one is available.
- m, err := opc.fetchPreparedMemo(ctx)
- if err != nil {
- return nil, err
- }
- if m != nil {
- return m, nil
+ p := opc.p
+ if p.SessionData().PlanCacheMode == sessiondatapb.PlanCacheModeForceCustom {
+ // Fallback to the legacy logic for reusing memos if plan_cache_mode is
+ // set to force_custom_plan.
+ m, err := opc.fetchPreparedMemoLegacy(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if m != nil {
+ return m, nil
+ }
+ } else {
+ // Use new logic for reusing memos if plan_cache_mode is set to
+ // force_generic_plan or auto.
+ m, err := opc.fetchPreparedMemo(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if m != nil {
+ return m, nil
+ }
}
- p := opc.p
if opc.useCache {
// Consult the query cache.
cachedData, ok := p.execCfg.QueryCache.Find(&p.queryCacheSession, opc.p.stmt.SQL)
@@ -788,7 +828,7 @@ func (opc *optPlanningCtx) buildExecMemo(ctx context.Context) (_ *memo.Memo, _ e
return nil, err
} else if isStale {
opc.log(ctx, "query cache hit but needed update")
- cachedData.Memo, _, err = opc.buildReusableMemo(ctx, false /* allowNonIdealGeneric */)
+ cachedData.Memo, _, err = opc.buildReusableMemo(ctx, false /* buildGeneric */)
if err != nil {
return nil, err
}
@@ -801,7 +841,7 @@ func (opc *optPlanningCtx) buildExecMemo(ctx context.Context) (_ *memo.Memo, _ e
opc.log(ctx, "query cache hit")
opc.flags.Set(planFlagOptCacheHit)
}
- return opc.reuseMemo(cachedData.Memo)
+ return opc.reuseMemo(ctx, cachedData.Memo)
}
opc.flags.Set(planFlagOptCacheMiss)
opc.log(ctx, "query cache miss")
diff --git a/pkg/sql/prepared_stmt.go b/pkg/sql/prepared_stmt.go
index 028a9c707d01..94216d3d7868 100644
--- a/pkg/sql/prepared_stmt.go
+++ b/pkg/sql/prepared_stmt.go
@@ -52,18 +52,23 @@ type PreparedStatement struct {
// BaseMemo is the memoized data structure constructed by the cost-based
// optimizer during prepare of a SQL statement.
+ //
+ // It may be a fully-optimized memo if it contains an "ideal generic plan"
+ // that is guaranteed to be optimal across all executions of the prepared
+ // statement. Ideal generic plans are generated when the statement has no
+ // placeholders nor fold-able stable expressions, or when the placeholder
+ // fast-path is utilized.
+ //
+ // If it is not an ideal generic plan, it is an unoptimized, normalized
+ // memo that is used as a starting point for optimization of custom plans.
BaseMemo *memo.Memo
// GenericMemo, if present, is a fully-optimized memo that can be executed
// as-is.
+ // TODO(mgartner): Put all fully-optimized plans in the GenericMemo field to
+ // reduce confusion.
GenericMemo *memo.Memo
- // IdealGenericPlan is true if GenericMemo is guaranteed to be optimal
- // across all executions of the prepared statement. Ideal generic plans are
- // generated when the statement has no placeholders nor fold-able stable
- // expressions, or when the placeholder fast-path is utilized.
- IdealGenericPlan bool
-
// Costs tracks the costs of previously optimized custom and generic plans.
Costs planCosts
diff --git a/pkg/sql/reassign_owned_by.go b/pkg/sql/reassign_owned_by.go
index 89e1d0d14cd0..c00c63e47618 100644
--- a/pkg/sql/reassign_owned_by.go
+++ b/pkg/sql/reassign_owned_by.go
@@ -8,6 +8,7 @@ package sql
import (
"context"
+ "github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/security/username"
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
@@ -19,7 +20,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/decodeusername"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
- "github.com/cockroachdb/cockroach/pkg/sql/sem/catconstants"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
"github.com/cockroachdb/errors"
@@ -120,26 +120,27 @@ func (n *reassignOwnedByNode) startExec(params runParams) error {
for _, oldRole := range n.normalizedOldRoles {
// There should only be one database (current).
for _, dbID := range lCtx.dbIDs {
- isOwner, err := isOwner(params.ctx, params.p, lCtx.dbDescs[dbID], oldRole)
+ dbDesc := lCtx.dbDescs[dbID]
+ owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, dbDesc)
if err != nil {
return err
}
- if isOwner {
- if err := n.reassignDatabaseOwner(lCtx.dbDescs[dbID], params); err != nil {
+ if owner == oldRole {
+ if err := n.reassignDatabaseOwner(dbDesc, params); err != nil {
return err
}
}
}
for _, schemaID := range lCtx.schemaIDs {
- isOwner, err := isOwner(params.ctx, params.p, lCtx.schemaDescs[schemaID], oldRole)
+ schemaDesc := lCtx.schemaDescs[schemaID]
+ owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, schemaDesc)
if err != nil {
return err
}
- if isOwner {
- // Don't reassign public schema.
- // TODO(richardjcai): revisit this in 22.2, in 22.1 we do not allow
- // modifying the public schema.
- if lCtx.schemaDescs[schemaID].GetName() == catconstants.PublicSchemaName {
+ if owner == oldRole {
+ // Don't reassign the descriptorless public schema for the system
+ // database.
+ if schemaID == keys.SystemPublicSchemaID {
continue
}
if err := n.reassignSchemaOwner(lCtx.schemaDescs[schemaID], currentDbDesc, params); err != nil {
@@ -149,33 +150,36 @@ func (n *reassignOwnedByNode) startExec(params runParams) error {
}
for _, tbID := range lCtx.tbIDs {
- isOwner, err := isOwner(params.ctx, params.p, lCtx.tbDescs[tbID], oldRole)
+ tbDesc := lCtx.tbDescs[tbID]
+ owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, tbDesc)
if err != nil {
return err
}
- if isOwner {
+ if owner == oldRole {
if err := n.reassignTableOwner(lCtx.tbDescs[tbID], params); err != nil {
return err
}
}
}
for _, typID := range lCtx.typIDs {
- isOwner, err := isOwner(params.ctx, params.p, lCtx.typDescs[typID], oldRole)
+ typDesc := lCtx.typDescs[typID]
+ owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, typDesc)
if err != nil {
return err
}
- if isOwner && (lCtx.typDescs[typID].AsAliasTypeDescriptor() == nil) {
+ if owner == oldRole && (lCtx.typDescs[typID].AsAliasTypeDescriptor() == nil) {
if err := n.reassignTypeOwner(lCtx.typDescs[typID].(catalog.NonAliasTypeDescriptor), params); err != nil {
return err
}
}
}
for _, fnID := range lCtx.fnIDs {
- isOwner, err := isOwner(params.ctx, params.p, lCtx.fnDescs[fnID], oldRole)
+ fnDesc := lCtx.fnDescs[fnID]
+ owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, fnDesc)
if err != nil {
return err
}
- if isOwner {
+ if owner == oldRole {
if err := n.reassignFunctionOwner(lCtx.fnDescs[fnID], params); err != nil {
return err
}
diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/helpers.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/helpers.go
index 3117eba70f95..5246a968811e 100644
--- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/helpers.go
+++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/helpers.go
@@ -991,12 +991,24 @@ func panicIfSchemaChangeIsDisallowed(tableElements ElementResultSet, n tree.Stat
}
_, _, ldrJobIDs := scpb.FindLDRJobIDs(tableElements)
- if ldrJobIDs != nil && len(ldrJobIDs.JobIDs) > 0 && !tree.IsAllowedLDRSchemaChange(n) {
- _, _, ns := scpb.FindNamespace(tableElements)
- if ns == nil {
- panic(errors.AssertionFailedf("programming error: Namespace element not found"))
+ if ldrJobIDs != nil && len(ldrJobIDs.JobIDs) > 0 {
+ var virtualColNames []string
+ scpb.ForEachColumnType(tableElements, func(current scpb.Status, target scpb.TargetStatus, colTypeElem *scpb.ColumnType) {
+ if !colTypeElem.IsVirtual {
+ return
+ }
+ col := tableElements.FilterColumnName().Filter(func(current scpb.Status, target scpb.TargetStatus, colNameElem *scpb.ColumnName) bool {
+ return colNameElem.ColumnID == colTypeElem.ColumnID && target == scpb.ToPublic
+ }).MustGetOneElement()
+ virtualColNames = append(virtualColNames, col.Name)
+ })
+ if !tree.IsAllowedLDRSchemaChange(n, virtualColNames) {
+ _, _, ns := scpb.FindNamespace(tableElements)
+ if ns == nil {
+ panic(errors.AssertionFailedf("programming error: Namespace element not found"))
+ }
+ panic(sqlerrors.NewDisallowedSchemaChangeOnLDRTableErr(ns.Name, ldrJobIDs.JobIDs))
}
- panic(sqlerrors.NewDisallowedSchemaChangeOnLDRTableErr(ns.Name, ldrJobIDs.JobIDs))
}
}
diff --git a/pkg/sql/sem/tree/schema_helpers.go b/pkg/sql/sem/tree/schema_helpers.go
index a7e4b55f852d..e78a36e376b9 100644
--- a/pkg/sql/sem/tree/schema_helpers.go
+++ b/pkg/sql/sem/tree/schema_helpers.go
@@ -32,14 +32,51 @@ func IsSetOrResetSchemaLocked(n Statement) bool {
// IsAllowedLDRSchemaChange returns true if the schema change statement is
// allowed to occur while the table is being referenced by a logical data
// replication job as a destination table.
-func IsAllowedLDRSchemaChange(n Statement) bool {
+func IsAllowedLDRSchemaChange(n Statement, virtualColNames []string) bool {
switch s := n.(type) {
case *CreateIndex:
- // Only allow non-unique and non-partial indexes to be created. A unique or
- // partial index on a destination table could cause inserts to fail.
- return !s.Unique && s.Predicate == nil
+ // Don't allow creating an index on a virtual column.
+ for _, col := range s.Columns {
+ if slices.Contains(virtualColNames, string(col.Column)) {
+ return false
+ }
+ }
+ // Disallow unique, partial, or hash-sharded indexes. Having these indexes
+ // on a destination table could cause inserts to fail.
+ // NB: hash-sharded indexes are disallowed since they create an index on a
+ // virtual column. Since it also implicitly creates the virtual column
+ // at the same time, the check above on virtualColNames would not block it.
+ return !s.Unique && s.Predicate == nil && s.Sharded == nil
case *DropIndex:
return true
+ case *SetZoneConfig:
+ return true
+ case *AlterTable:
+ onlySafeStorageParams := true
+ for _, cmd := range s.Cmds {
+ switch c := cmd.(type) {
+ // Allow safe storage parameter changes.
+ case *AlterTableSetStorageParams:
+ // ttl_expire_after is not safe since it creates a new column and
+ // backfills it.
+ if c.StorageParams.GetVal("ttl_expire_after") != nil {
+ onlySafeStorageParams = false
+ }
+ case *AlterTableResetStorageParams:
+ if slices.Contains(c.Params, "ttl_expire_after") {
+ // Resetting `ttl_expire_after` is not safe since it drops a column
+ // and rebuilds the primary index.
+ onlySafeStorageParams = false
+ } else if slices.Contains(c.Params, "ttl") {
+ // Resetting `ttl` can also result in the expiration column being
+ // dropped.
+ onlySafeStorageParams = false
+ }
+ default:
+ onlySafeStorageParams = false
+ }
+ }
+ return onlySafeStorageParams
}
return false
}
diff --git a/pkg/sql/sem/tree/schema_helpers_test.go b/pkg/sql/sem/tree/schema_helpers_test.go
index c300979266ea..befdd0c65ac9 100644
--- a/pkg/sql/sem/tree/schema_helpers_test.go
+++ b/pkg/sql/sem/tree/schema_helpers_test.go
@@ -58,13 +58,31 @@ func TestIsAllowedLDRSchemaChange(t *testing.T) {
stmt: "ALTER TABLE t ADD COLUMN a INT, DROP COLUMN b",
isAllowed: false,
},
+ {
+ stmt: "ALTER TABLE t ADD COLUMN a INT, SET (ttl = 'on', ttl_expiration_expression = 'expires_at')",
+ isAllowed: false,
+ },
+ {
+ stmt: "ALTER TABLE t SET (ttl = 'on', ttl_expire_after = '5m')",
+ isAllowed: false,
+ },
+ {
+ stmt: "ALTER TABLE t SET (ttl = 'on', ttl_expiration_expression = 'expires_at')",
+ isAllowed: true,
+ },
+ {
+ stmt: "ALTER TABLE t RESET (ttl, ttl_expiration_expression)",
+ isAllowed: false,
+ },
} {
t.Run(tc.stmt, func(t *testing.T) {
stmt, err := parser.ParseOne(tc.stmt)
if err != nil {
t.Fatal(err)
}
- if got := tree.IsAllowedLDRSchemaChange(stmt.AST); got != tc.isAllowed {
+ // Tests for virtual column checks are in
+ // TestLogicalReplicationCreationChecks.
+ if got := tree.IsAllowedLDRSchemaChange(stmt.AST, nil /* virtualColNames */); got != tc.isAllowed {
t.Errorf("expected %v, got %v", tc.isAllowed, got)
}
})
diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go
index 936c03a0862c..3e670fef751a 100644
--- a/pkg/storage/pebble.go
+++ b/pkg/storage/pebble.go
@@ -406,10 +406,10 @@ func ShouldUseEFOS(settings *settings.Values) bool {
return UseEFOS.Get(settings) || UseExciseForSnapshots.Get(settings)
}
-// EngineSuffixCompare implements pebble.Comparer.CompareSuffixes. It compares
+// EngineRangeSuffixCompare implements pebble.Comparer.CompareSuffixes. It compares
// cockroach suffixes (which are composed of the version and a trailing sentinel
// byte); the version can be an MVCC timestamp or a lock key.
-func EngineSuffixCompare(a, b []byte) int {
+func EngineRangeSuffixCompare(a, b []byte) int {
if len(a) == 0 || len(b) == 0 {
// Empty suffixes sort before non-empty suffixes.
return cmp.Compare(len(a), len(b))
@@ -617,10 +617,12 @@ func normalizeEngineSuffixForCompare(a []byte) []byte {
// EngineComparer is a pebble.Comparer object that implements MVCC-specific
// comparator settings for use with Pebble.
var EngineComparer = &pebble.Comparer{
- Split: EngineKeySplit,
- CompareSuffixes: EngineSuffixCompare,
- Compare: EngineKeyCompare,
- Equal: EngineKeyEqual,
+ Split: EngineKeySplit,
+ CompareRangeSuffixes: EngineRangeSuffixCompare,
+ ComparePointSuffixes: EnginePointSuffixCompare,
+
+ Compare: EngineKeyCompare,
+ Equal: EngineKeyEqual,
AbbreviatedKey: func(k []byte) uint64 {
key, ok := GetKeyPartFromEngineKey(k)
@@ -840,7 +842,7 @@ func DefaultPebbleOptions() *pebble.Options {
Comparer: EngineComparer,
FS: vfs.Default,
KeySchema: keySchema.Name,
- KeySchemas: sstable.MakeKeySchemas(keySchema),
+ KeySchemas: sstable.MakeKeySchemas(&keySchema),
// A value of 2 triggers a compaction when there is 1 sub-level.
L0CompactionThreshold: 2,
L0StopWritesThreshold: 1000,
@@ -1238,6 +1240,10 @@ func newPebble(ctx context.Context, cfg engineConfig) (p *Pebble, err error) {
return IngestSplitEnabled.Get(&cfg.settings.SV)
}
cfg.opts.Experimental.EnableColumnarBlocks = func() bool {
+ // TODO(radu): disable completely for now since the format is not finalized.
+ if true {
+ return false
+ }
return columnarBlocksEnabled.Get(&cfg.settings.SV)
}
cfg.opts.Experimental.EnableDeleteOnlyCompactionExcises = func() bool {
diff --git a/pkg/storage/pebble_key_schema.go b/pkg/storage/pebble_key_schema.go
index 8c80008e4139..763a3d9e6bb0 100644
--- a/pkg/storage/pebble_key_schema.go
+++ b/pkg/storage/pebble_key_schema.go
@@ -11,7 +11,6 @@ import (
"encoding/binary"
"fmt"
"io"
- "sync"
"unsafe"
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
@@ -47,15 +46,14 @@ var keySchema = colblk.KeySchema{
cockroachColUntypedVersion: colblk.DataTypeBytes,
},
NewKeyWriter: func() colblk.KeyWriter {
- kw := &cockroachKeyWriter{}
- kw.roachKeys.Init(16)
- kw.wallTimes.Init()
- kw.logicalTimes.InitWithDefault()
- kw.untypedVersions.Init()
- return kw
+ return makeCockroachKeyWriter()
},
- NewKeySeeker: func() colblk.KeySeeker {
- return &cockroachKeySeeker{}
+ InitKeySeekerMetadata: func(meta *colblk.KeySeekerMetadata, d *colblk.DataBlockDecoder) {
+ ks := (*cockroachKeySeeker)(unsafe.Pointer(meta))
+ ks.init(d)
+ },
+ KeySeeker: func(meta *colblk.KeySeekerMetadata) colblk.KeySeeker {
+ return (*cockroachKeySeeker)(unsafe.Pointer(meta))
},
}
@@ -70,6 +68,15 @@ type cockroachKeyWriter struct {
// Assert *cockroachKeyWriter implements colblk.KeyWriter.
var _ colblk.KeyWriter = (*cockroachKeyWriter)(nil)
+func makeCockroachKeyWriter() *cockroachKeyWriter {
+ kw := &cockroachKeyWriter{}
+ kw.roachKeys.Init(16)
+ kw.wallTimes.Init()
+ kw.logicalTimes.InitWithDefault()
+ kw.untypedVersions.Init()
+ return kw
+}
+
func (kw *cockroachKeyWriter) ComparePrev(key []byte) colblk.KeyComparison {
var cmpv colblk.KeyComparison
cmpv.PrefixLen = int32(EngineKeySplit(key)) // TODO(jackson): Inline
@@ -223,9 +230,7 @@ func (kw *cockroachKeyWriter) Finish(
}
}
-var cockroachKeySeekerPool = sync.Pool{
- New: func() interface{} { return &cockroachKeySeeker{} },
-}
+func (kw *cockroachKeyWriter) FinishHeader(buf []byte) {}
type cockroachKeySeeker struct {
roachKeys colblk.PrefixBytes
@@ -235,17 +240,18 @@ type cockroachKeySeeker struct {
untypedVersions colblk.RawBytes
}
+// Assert that the cockroachKeySeeker fits inside KeySeekerMetadata.
+var _ uint = colblk.KeySeekerMetadataSize - uint(unsafe.Sizeof(cockroachKeySeeker{}))
+
var _ colblk.KeySeeker = (*cockroachKeySeeker)(nil)
-// Init is part of the KeySeeker interface.
-func (ks *cockroachKeySeeker) Init(d *colblk.DataBlockDecoder) error {
+func (ks *cockroachKeySeeker) init(d *colblk.DataBlockDecoder) {
bd := d.BlockDecoder()
ks.roachKeys = bd.PrefixBytes(cockroachColRoachKey)
ks.roachKeyChanged = d.PrefixChanged()
ks.mvccWallTimes = bd.Uints(cockroachColMVCCWallTime)
ks.mvccLogical = bd.Uints(cockroachColMVCCLogical)
ks.untypedVersions = bd.RawBytes(cockroachColUntypedVersion)
- return nil
}
// IsLowerBound compares the provided key to the first user key
@@ -460,11 +466,5 @@ func (ks *cockroachKeySeeker) MaterializeUserKeyWithSyntheticSuffix(
return res
}
-// Release is part of the KeySeeker interface.
-func (ks *cockroachKeySeeker) Release() {
- *ks = cockroachKeySeeker{}
- cockroachKeySeekerPool.Put(ks)
-}
-
//go:linkname memmove runtime.memmove
func memmove(to, from unsafe.Pointer, n uintptr)
diff --git a/pkg/storage/pebble_key_schema_test.go b/pkg/storage/pebble_key_schema_test.go
index 57c917d3a801..eebec84965a3 100644
--- a/pkg/storage/pebble_key_schema_test.go
+++ b/pkg/storage/pebble_key_schema_test.go
@@ -13,6 +13,7 @@ import (
"strconv"
"strings"
"testing"
+ "unsafe"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
"github.com/cockroachdb/cockroach/pkg/testutils/datapathutils"
@@ -121,18 +122,12 @@ func TestKeySchema_KeySeeker(t *testing.T) {
var dec colblk.DataBlockDecoder
var ks colblk.KeySeeker
var maxKeyLen int
- enc.Init(keySchema)
+ enc.Init(&keySchema)
initKeySeeker := func() {
- if ks == nil || rand.Intn(2) == 1 {
- if ks != nil {
- ks.Release()
- }
- ks = keySchema.NewKeySeeker()
- }
- if err := ks.Init(&dec); err != nil {
- t.Fatal(err)
- }
+ ksPointer := &cockroachKeySeeker{}
+ keySchema.InitKeySeekerMetadata((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)), &dec)
+ ks = keySchema.KeySeeker((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)))
}
datadriven.RunTest(t, datapathutils.TestDataPath(t, "key_schema_key_seeker"), func(t *testing.T, td *datadriven.TestData) string {
@@ -158,7 +153,7 @@ func TestKeySchema_KeySeeker(t *testing.T) {
rows++
}
blk, _ := enc.Finish(rows, enc.Size())
- dec.Init(keySchema, blk)
+ dec.Init(&keySchema, blk)
return buf.String()
case "is-lower-bound":
initKeySeeker()
diff --git a/pkg/storage/pebble_test.go b/pkg/storage/pebble_test.go
index 00a3ccf99491..2d50bbbe127b 100644
--- a/pkg/storage/pebble_test.go
+++ b/pkg/storage/pebble_test.go
@@ -92,10 +92,33 @@ func TestEngineComparer(t *testing.T) {
ts3a := appendBytesToTimestamp(ts3, zeroLogical[:])
ts3b := appendBytesToTimestamp(ts3, slices.Concat(zeroLogical[:], syntheticBit))
- // We group versions by equality and in the expected ordering.
- orderedVersions := []any{
+ // We group versions by equality and in the expected point key ordering.
+ orderedVersions := [][]any{
+ {ts1}, // Empty version sorts first.
+ {ts2a, ts2},
+ {ts3b, ts3a, ts3},
+ {ts4},
+ {ts5},
+ }
+
+ // Compare range suffixes.
+ for i := range orderedVersions {
+ for j := range orderedVersions {
+ for _, v1 := range orderedVersions[i] {
+ for _, v2 := range orderedVersions[j] {
+ result := EngineComparer.ComparePointSuffixes(encodeVersion(v1), encodeVersion(v2))
+ if expected := cmp.Compare(i, j); result != expected {
+ t.Fatalf("CompareSuffixes(%x, %x) = %d, expected %d", v1, v2, result, expected)
+ }
+ }
+ }
+ }
+ }
+
+ // CompareRangeSuffixes has a more strict ordering.
+ rangeOrderedVersions := []any{
ts1, // Empty version sorts first.
- ts2a, // Synthetic bit is not ignored when comparing suffixes.
+ ts2a, // Synthetic bit is not ignored when comparing range suffixes.
ts2,
ts3b, // Higher timestamps sort before lower timestamps.
ts3a,
@@ -104,10 +127,10 @@ func TestEngineComparer(t *testing.T) {
ts5,
}
- // Compare suffixes.
- for i, v1 := range orderedVersions {
- for j, v2 := range orderedVersions {
- result := EngineComparer.CompareSuffixes(encodeVersion(v1), encodeVersion(v2))
+ // Compare range suffixes.
+ for i, v1 := range rangeOrderedVersions {
+ for j, v2 := range rangeOrderedVersions {
+ result := EngineComparer.CompareRangeSuffixes(encodeVersion(v1), encodeVersion(v2))
if expected := cmp.Compare(i, j); result != expected {
t.Fatalf("CompareSuffixes(%x, %x) = %d, expected %d", v1, v2, result, expected)
}
@@ -116,10 +139,15 @@ func TestEngineComparer(t *testing.T) {
lock1 := bytes.Repeat([]byte{1}, engineKeyVersionLockTableLen)
lock2 := bytes.Repeat([]byte{2}, engineKeyVersionLockTableLen)
- require.Equal(t, 0, EngineComparer.CompareSuffixes(encodeVersion(lock1), encodeVersion(lock1)))
- require.Equal(t, 0, EngineComparer.CompareSuffixes(encodeVersion(lock2), encodeVersion(lock2)))
- require.Equal(t, +1, EngineComparer.CompareSuffixes(encodeVersion(lock1), encodeVersion(lock2)))
- require.Equal(t, -1, EngineComparer.CompareSuffixes(encodeVersion(lock2), encodeVersion(lock1)))
+ require.Equal(t, 0, EngineComparer.CompareRangeSuffixes(encodeVersion(lock1), encodeVersion(lock1)))
+ require.Equal(t, 0, EngineComparer.CompareRangeSuffixes(encodeVersion(lock2), encodeVersion(lock2)))
+ require.Equal(t, +1, EngineComparer.CompareRangeSuffixes(encodeVersion(lock1), encodeVersion(lock2)))
+ require.Equal(t, -1, EngineComparer.CompareRangeSuffixes(encodeVersion(lock2), encodeVersion(lock1)))
+
+ require.Equal(t, 0, EngineComparer.ComparePointSuffixes(encodeVersion(lock1), encodeVersion(lock1)))
+ require.Equal(t, 0, EngineComparer.ComparePointSuffixes(encodeVersion(lock2), encodeVersion(lock2)))
+ require.Equal(t, +1, EngineComparer.ComparePointSuffixes(encodeVersion(lock1), encodeVersion(lock2)))
+ require.Equal(t, -1, EngineComparer.ComparePointSuffixes(encodeVersion(lock2), encodeVersion(lock1)))
keys := []roachpb.Key{
roachpb.Key(""),
@@ -128,7 +156,7 @@ func TestEngineComparer(t *testing.T) {
roachpb.Key("fg"),
}
- // We group keys by equality and in the expected ordering.
+ // We group keys by equality and the groups are in the expected order.
var orderedKeys [][][]byte
for _, k := range keys {
orderedKeys = append(orderedKeys,
diff --git a/pkg/testutils/lint/passes/fmtsafe/functions.go b/pkg/testutils/lint/passes/fmtsafe/functions.go
index 87c6314f597f..4890f40faecb 100644
--- a/pkg/testutils/lint/passes/fmtsafe/functions.go
+++ b/pkg/testutils/lint/passes/fmtsafe/functions.go
@@ -120,8 +120,6 @@ var requireConstFmt = map[string]bool{
"(*github.com/cockroachdb/cockroach/pkg/kv/kvserver.raftLogger).Fatalf": true,
"(*github.com/cockroachdb/cockroach/pkg/kv/kvserver.raftLogger).Panicf": true,
- "(*github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace.traceValue).logf": true,
-
"(*github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/rac2.LogTracker).errorf": true,
"(github.com/cockroachdb/cockroach/pkg/raft/raftlogger.Logger).Debugf": true,
diff --git a/pkg/testutils/lint/passes/redactcheck/redactcheck.go b/pkg/testutils/lint/passes/redactcheck/redactcheck.go
index 00e764fdeb6e..025db3047678 100644
--- a/pkg/testutils/lint/passes/redactcheck/redactcheck.go
+++ b/pkg/testutils/lint/passes/redactcheck/redactcheck.go
@@ -138,12 +138,8 @@ func runAnalyzer(pass *analysis.Pass) (interface{}, error) {
"ID": {},
},
"github.com/cockroachdb/cockroach/pkg/raft/raftpb": {
- "Epoch": {},
- "PeerID": {},
- "MessageType": {},
- "EntryType": {},
- "ConfChangeType": {},
- "ConfChangeTransition": {},
+ "Epoch": {},
+ "PeerID": {},
},
"github.com/cockroachdb/cockroach/pkg/repstream/streampb": {
"StreamID": {},
@@ -229,10 +225,6 @@ func runAnalyzer(pass *analysis.Pass) (interface{}, error) {
"WorkKind": {},
"QueueKind": {},
},
- "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb": {
- "TraceID": {},
- "SpanID": {},
- },
"github.com/cockroachdb/cockroach/pkg/util/hlc": {
"ClockTimestamp": {},
"LegacyTimestamp": {},
diff --git a/pkg/testutils/release/cockroach_releases.yaml b/pkg/testutils/release/cockroach_releases.yaml
index c6a55d32e95f..0c5378376fb4 100644
--- a/pkg/testutils/release/cockroach_releases.yaml
+++ b/pkg/testutils/release/cockroach_releases.yaml
@@ -19,13 +19,13 @@
- 23.1.0
predecessor: "22.2"
"23.2":
- latest: 23.2.12
+ latest: 23.2.13
predecessor: "23.1"
"24.1":
- latest: 24.1.5
+ latest: 24.1.6
predecessor: "23.2"
"24.2":
- latest: 24.2.3
+ latest: 24.2.4
withdrawn:
- 24.2.1
predecessor: "24.1"
diff --git a/pkg/ui/workspaces/cluster-ui/src/api/databaseDetailsApi.ts b/pkg/ui/workspaces/cluster-ui/src/api/databaseDetailsApi.ts
index 756ee115ca11..1448bfc514db 100644
--- a/pkg/ui/workspaces/cluster-ui/src/api/databaseDetailsApi.ts
+++ b/pkg/ui/workspaces/cluster-ui/src/api/databaseDetailsApi.ts
@@ -83,7 +83,6 @@ function newDatabaseDetailsSpanStatsResponse(): DatabaseDetailsSpanStatsResponse
approximate_disk_bytes: 0,
live_bytes: 0,
total_bytes: 0,
- range_count: 0,
},
error: undefined,
};
@@ -332,7 +331,6 @@ export type DatabaseSpanStatsRow = {
approximate_disk_bytes: number;
live_bytes: number;
total_bytes: number;
- range_count: number;
};
function formatSpanStatsExecutionResult(
@@ -357,7 +355,6 @@ function formatSpanStatsExecutionResult(
if (txnResult.rows.length === 1) {
const row = txnResult.rows[0];
out.spanStats.approximate_disk_bytes = row.approximate_disk_bytes;
- out.spanStats.range_count = row.range_count;
out.spanStats.live_bytes = row.live_bytes;
out.spanStats.total_bytes = row.total_bytes;
} else {
@@ -511,7 +508,6 @@ export function createDatabaseDetailsSpanStatsReq(
): SqlExecutionRequest {
const statement = {
sql: `SELECT
- sum(range_count) as range_count,
sum(approximate_disk_bytes) as approximate_disk_bytes,
sum(live_bytes) as live_bytes,
sum(total_bytes) as total_bytes
diff --git a/pkg/ui/workspaces/cluster-ui/src/databasesPage/databasesPage.tsx b/pkg/ui/workspaces/cluster-ui/src/databasesPage/databasesPage.tsx
index ed446a1ae0e8..96aae47eaee6 100644
--- a/pkg/ui/workspaces/cluster-ui/src/databasesPage/databasesPage.tsx
+++ b/pkg/ui/workspaces/cluster-ui/src/databasesPage/databasesPage.tsx
@@ -571,29 +571,6 @@ export class DatabasesPage extends React.Component<
className: cx("databases-table__col-table-count"),
name: "tableCount",
},
- {
- title: (
-
- Range Count
-
- ),
- cell: database => (
-
- {database.spanStats?.range_count}
-
- ),
- sort: database => database.spanStats?.range_count,
- className: cx("databases-table__col-range-count"),
- name: "rangeCount",
- },
{
title: (
{
spanStats: {
approximate_disk_bytes: 100,
live_bytes: 200,
- range_count: 300,
total_bytes: 400,
error: undefined,
},
diff --git a/pkg/ui/workspaces/db-console/src/util/api.spec.ts b/pkg/ui/workspaces/db-console/src/util/api.spec.ts
index 1f9f2928915d..00029c6a3409 100644
--- a/pkg/ui/workspaces/db-console/src/util/api.spec.ts
+++ b/pkg/ui/workspaces/db-console/src/util/api.spec.ts
@@ -119,7 +119,6 @@ describe("rest api", function () {
approximate_disk_bytes: 100,
live_bytes: 200,
total_bytes: 300,
- range_count: 400,
},
],
},
@@ -130,7 +129,6 @@ describe("rest api", function () {
expect(res.results.spanStats.approximate_disk_bytes).toEqual(100);
expect(res.results.spanStats.live_bytes).toEqual(200);
expect(res.results.spanStats.total_bytes).toEqual(300);
- expect(res.results.spanStats.range_count).toEqual(400);
});
});
});
diff --git a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/overview.tsx b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/overview.tsx
index 3f5269e36faa..cf1c1795b8e8 100644
--- a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/overview.tsx
+++ b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/overview.tsx
@@ -30,12 +30,12 @@ export default function (props: GraphDashboardProps) {
return [
@@ -60,6 +60,11 @@ export default function (props: GraphDashboardProps) {
title="Deletes"
nonNegativeRate
/>
+
,
diff --git a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/sql.tsx b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/sql.tsx
index dcae25ab7407..424d21ba55a4 100644
--- a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/sql.tsx
+++ b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/sql.tsx
@@ -137,12 +137,12 @@ export default function (props: GraphDashboardProps) {
,
@@ -166,6 +166,11 @@ export default function (props: GraphDashboardProps) {
title="Deletes"
nonNegativeRate
/>
+
,
diff --git a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/summaryBar.tsx b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/summaryBar.tsx
index ae924b08b8f3..c627352b74e4 100644
--- a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/summaryBar.tsx
+++ b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/summaryBar.tsx
@@ -10,7 +10,7 @@ import { useSelector } from "react-redux";
import { Link } from "react-router-dom";
import { createSelector } from "reselect";
-import { Tooltip, Anchor } from "src/components";
+import { Anchor, Tooltip } from "src/components";
import { nodeStatusesSelector, nodeSumsSelector } from "src/redux/nodes";
import { howAreCapacityMetricsCalculated } from "src/util/docs";
import { EventBox } from "src/views/cluster/containers/events";
@@ -18,11 +18,11 @@ import { Metric } from "src/views/shared/components/metricQuery";
import {
SummaryBar,
SummaryLabel,
+ SummaryMetricsAggregator,
SummaryMetricStat,
SummaryStat,
SummaryStatBreakdown,
SummaryStatMessage,
- SummaryMetricsAggregator,
} from "src/views/shared/components/summaryBar";
/**
@@ -145,28 +145,7 @@ export default function (props: ClusterSummaryProps) {
>
-
-
-