diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index f951f53bebd0..4d9db87ccec8 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -1629,6 +1629,82 @@ Tier represents one level of the locality hierarchy. +## TenantServiceStatus + +`GET /_status/tenant_service_status` + +TenantServiceStatus returns the current service and data state of +the given tenant as known to the server orchestrator, which may +differ from the database state. + +Support status: [reserved](#support-status) + +#### Request Parameters + + + + + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| node_id | [string](#cockroach.server.serverpb.TenantServiceStatusRequest-string) | | | [reserved](#support-status) | +| tenant_id | [uint64](#cockroach.server.serverpb.TenantServiceStatusRequest-uint64) | | | [reserved](#support-status) | + + + + + + + +#### Response Parameters + + + + + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| status_by_node_id | [TenantServiceStatusResponse.StatusByNodeIdEntry](#cockroach.server.serverpb.TenantServiceStatusResponse-cockroach.server.serverpb.TenantServiceStatusResponse.StatusByNodeIdEntry) | repeated | | [reserved](#support-status) | +| errors_by_node_id | [TenantServiceStatusResponse.ErrorsByNodeIdEntry](#cockroach.server.serverpb.TenantServiceStatusResponse-cockroach.server.serverpb.TenantServiceStatusResponse.ErrorsByNodeIdEntry) | repeated | | [reserved](#support-status) | + + + + + + + +#### TenantServiceStatusResponse.StatusByNodeIdEntry + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| key | [int32](#cockroach.server.serverpb.TenantServiceStatusResponse-int32) | | | | +| value | [cockroach.multitenant.SQLInfo](#cockroach.server.serverpb.TenantServiceStatusResponse-cockroach.multitenant.SQLInfo) | | | | + + + + + + +#### TenantServiceStatusResponse.ErrorsByNodeIdEntry + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| key | [int32](#cockroach.server.serverpb.TenantServiceStatusResponse-int32) | | | | +| value | [string](#cockroach.server.serverpb.TenantServiceStatusResponse-string) | | | | + + + + + + ## TenantRanges `GET /_status/tenant_ranges` diff --git a/docs/tech-notes/observability/statistics_generation.md b/docs/tech-notes/observability/statistics_generation.md new file mode 100644 index 000000000000..ed66f789e7a8 --- /dev/null +++ b/docs/tech-notes/observability/statistics_generation.md @@ -0,0 +1,281 @@ +# Statistics Generation +Last Update: February 2024 + +Original author: maryliag + +This document details the statistics collected for both Statements and +Transactions, going over all columns on system.statement_statistics and +system.transaction_statistics. + +Table of Contents: + +- [Statement](#statement) + - [Key Values](#key-values) + - [Metadata](#metadata) + - [Statistics](#statistics) + - [Computed Columns](#computed-columns) + - [Extra](#extra) +- [Transaction](#transaction) + - [Key Values](#key-values-1) + - [Metadata](#metadata-1) + - [Statistics](#statistics-1) + - [Computed Columns](#computed-columns-1) + - [Extra](#extra-1) + + +## Statement +The columns stored per statement can be divided in: key values, metadata, +statistics e computed columns. + +### Key Values +These are the values used as keys on the table and their combination will be unique +for each new row. + +- `aggregated_ts`: the timestamp in which the statistics are being grouped. The +default period is 1h, meaning all executions on 1:xx are saved with +`aggregated_ts` of `1:00` and so on. Ex: `2024-02-07 15:00:00+00` +- `fingerprint_id`: the fingerprint ID of the statement, which is generated by +hashing the statement fingerprint (the query with the constants redacted), its +database and failure status, and if it was part of an implicit transaction. Ex: +`\x0a5cc4970527253a` +- `transaction_fingerprint_id`: the transaction fingerprint ID of the transaction +to which this statement was part of. Ex: `\xa53f79db832692e5` +- `plan_hash`: the hash of the plan gist (described below). Ex: `\x92fb94a0f3ac5a36` +- `app_name`: the application that ran the statement. Ex: `insights` +- `node_id`: the ID of the gateway node in which the statement was executed. +This value will be set to 0 if we disable the collection of gateway node with +ths cluster setting `sql.metrics.statement_details.gateway_node.enabled`. +Disabling this value help customers that have a lot of nodes and will generate +a lot of unique rows for the same statement fingerprint. Ex: `1` + +### Metadata +Metadata is a JSONB column with some information about the execution. The keys +of this JSONB are: + +- `db`: the database on which the statement was executed on. Ex: `insights` +- `distsql`: if was executed as a distributed SQL. Ex: `true` +- `failed`: if the statement failed to be executed. Ex: `false` +- `fullScan`: if the statement had a `FULL SCAN` as part of the plan. Ex: `true` +- `implicitTxn`: if the statement was part of an implicit transaction. Ex: `true` +- `query`: the statement itself with the parameters redacted. Ex: +`SELECT balance FROM insights_workload_table_1 ORDER BY balance DESC LIMIT $1` +- `querySummary`: the shorter version of the statement that is used on the +Console UI, to improve readability on tables with a lot of information, such +as the table on SQL Activity pages. Ex: `SELECT balance FROM insights_workload_table_1` +- `stmtType`: the type of the statement[^1]. Ex: `TypeDML` +- `vec`: if was vectorized. Ex: `true` + +### Statistics +The statistics column is a JSONB that can be divided in 2 groups: executions and +statistics. The majority of the numeric values on this column have its value as +an object with `mean` and a `sqDiff` (square difference). + +The values from `execution_statistics` are only populated using sampling: + +- `cnt`: how many times this fingerprint was sampled on the aggregation period. +This is helpful to know how many execution these statistics are based on. Ex: `3` +- `contentionTime`: the time the statement was in contention with other statements. This time +does not include the time it takes to stream results back to the client. +Ex: `{"mean": 0.0035941523333333334, "sqDiff": 0.0000036056329785926688}` +- `cpuSQLNanos`: the SQL CPU time spent executing. It does not include SQL planning time nor +KV execution time. Ex: `{"mean": 193173, "sqDiff": 767794898}` +- `maxDiskUsage`: the maximum disk usage. Ex: `{"mean": 0, "sqDiff": 0}` +- `maxMemUsage`: the maximum memory usage. Ex: `{"mean": 2.2528E+5, "sqDiff": 0}` +- `networkBytes`: the amount of data transferred over the network (e.g., between regions and +nodes). If this value is 0, the statement was executed on a single node. Ex: `{"mean": 0, "sqDiff": 0}` +- `networkMsgs`: the amount of network messages over the network. Ex: `{"mean": 0, "sqDiff": 0}` +- `mvccIteratorStats`: statistics by the MVCC iterator. Ex: `{"blockBytes": {"mean": 0, "sqDiff": 0}, + "blockBytesInCache": {"mean": 0, "sqDiff": 0}, + "keyBytes": {"mean": 0, "sqDiff": 0}, + "pointCount": {"mean": 1006, "sqDiff": 0}, + "pointsCoveredByRangeTombstones": {"mean": 0, "sqDiff": 0}, + "rangeKeyContainedPoints": {"mean": 0, "sqDiff": 0}, + "rangeKeyCount": {"mean": 0, "sqDiff": 0}, + "rangeKeySkippedPoints": {"mean": 0, "sqDiff": 0}, + "seekCount": {"mean": 1E+1, "sqDiff": 0}, + "seekCountInternal": {"mean": 1E+1, "sqDiff": 0}, + "stepCount": {"mean": 1E+3, "sqDiff": 0}, + "stepCountInternal": {"mean": 1006, "sqDiff": 0}, + "valueBytes": {"mean": 149912, "sqDiff": 0}}` + + +The values from `statistics` are updated on every execution: + +- `bytesRead`: the aggregation of all bytes read from disk across all operators for this +statement. Ex: `{"mean": 173717.41176470587, "sqDiff": 38420.117647061816}` +- `cnt`: amount of executions of this statement. Notice this value will always be greater or +equal the `cnt` on `execution_statistics`. Ex: `17` +- `firstAttemptCnt`: amount of first attempts of this statement. The difference between this +value and the `cnt` will be the amount of retries. Ex: `17` +- `idleLat`: the time between statement executions in a transaction. Ex: `{"mean": 0, "sqDiff": 0}` +- `indexes`: the list of indexes used by this statement. The format will be `tableID@indexID`. +Ex: `["115@1"]` +- `lastErrorCode`: the last error code if the statement failed. Ex: `XXUUU` +- `lastExecAt`: the timestamp of the last execution withing this aggregated timestamp of the +statement. Ex: `2024-02-07T15:37:58.201843Z` +- `latencyInfo`: information about latency. The values for `max` and `min` are based on all +execution of the statement within the aggregated timestamp, and the percentiles are sampled +and can include a period longer than the aggregated timestamp of the statement. +Ex: `{"max": 2.82403375, "min": 0.002160625, "p50": 0.007772875, "p90": 0.007772875, "p99": 0.007772875}` +- `maxRetries`: amount of retries. Ex: `0` +- `nodes`: a list of all nodes that were part of the statement execution. Note that even if we +disable the gateway node column, it will still show the value here. Ex: `[1]` +- `numRows`: number of rows affected by this statement. Ex: `{"mean": 365.70588235294116, "sqDiff": 1104041.5294117648}` +- `ovhLat`: overhead latency, includes transaction and retry management, error checking, etc. +Ex: `{"mean": 0.000003968117647044402, "sqDiff": 2.116522637650619E-10}` +- `parseLat`: the time to transform the SQL string into an abstract syntax tree (AST). +Ex: `{"mean": 0, "sqDiff": 0}` +- `planGists`: the plan gist used by the statement. Plan gist is a fingerprint for a query +plan, representing the logical query plan with items like constants removed and table names +replaced with table IDs. A plan gist can be decoded to a plan. Ex: `["AgHmAQIAAgAAABgGAg=="]` +- `planLat`: the time to transform the AST into a logical query plan. +Ex: `{"mean": 0.0005751862941176471, "sqDiff": 0.00003654328361436952}` +- `regions`: all regions in which the statement was executed. Ex: `["us-east1"]` +- `rowsRead`: the amount of rows read by the statement. Ex: `{"mean": 1E+3, "sqDiff": 0}` +- `rowsWritten`: the amount of rows written by the statement. Ex: `{"mean": 0, "sqDiff": 0}` +- `runLat`: the time to run the statement and fetch or compute the result rows. +Ex: `{"mean": 0.37770100005882346, "sqDiff": 9.522893593797042}` +- `svcLat`: the time to service the query, from start of parse to end of execute. +Ex: `{"mean": 0.37828015447058816, "sqDiff": 9.518556061599973}` + + +There is also a value for `index_recommendations` as a array of strings, that exists both as +part of the `statistics` column and as its own column to improve on `select`s by this value. +Ex: `{"creation : CREATE INDEX ON insights.public.insights_workload_table_1 (balance DESC);"}` + + + +### Computed columns +These are values that exist on the statistics column, but are computed again +here to be their own columns. This is helpful so indexes can be created +directly on these columns, which is helpful when running `select`s with filters +on these values. All the indexes filter out `app_name` that start with `$ internal`. +The definition of those values (when not described) are the +same from the items with the same name listed above. + +- `indexes_usage`: this column have a `INVERTED INDEX`. Ex: `["115@1"]` +- `execution_count`. Ex: `17` +- `service_latency`. Ex: `0.37828015447058816` +- `cpu_sql_nanos`. Ex: `193173` +- `contention_time`. Ex: `0.0035941523333333334` +- `p99_latency`. Ex: `0.007772875` +- `total_estimated_execution_time`: the value os the count multiplied by the + mean service latency. Ex: `6.430762625999999` + +### Extra +There are a few other columns that don't qualify as any of the categories above. + +- `agg_interval`: the interval used to aggregate the executions. Ex: `01:00:00` +- `plan`: a sampled plan. As of 23.1+ this column is no longer populated to improve +performance. The value can be populated again by enabling the cluster setting +`sql.metrics.statement_details.plan_collection.enabled`. Ex: `{"Children": [], "Name": ""}` (with sampling disabled) + +## Transaction +The columns stored per transaction can be divided in: key values, metadata, +statistics e computed columns. + +### Key Values +These are the values used as keys on the table and their combination will be unique +for each new row. + +- `aggregated_ts`: the timestamp in which the statistics are being grouped. The +default period is 1h, meaning all executions on 1:xx are saved with +`aggregated_ts` of `1:00` and so on. Ex: `2024-02-06 18:00:00+00` +- `fingerprint_id`: the fingerprint ID of the transaction, which is generated +by hashing the individual statement fingerprint IDs that comprised the +transaction. Ex: `\xab97426ea38ee52d` +- `app_name`: the application the ran the transaction. Ex: `$ cockroach demo` +- `node_id`: the ID of the gateway node in which the statement was executed. + This value will be set to 0 if we disable the collection of gateway node with + ths cluster setting `sql.metrics.statement_details.gateway_node.enabled`. + Disabling this value help customers that have a lot of nodes and will generate + a lot of unique rows for the same statement fingerprint. Ex: `1` + + +### Metadata +Metadata is a JSONB column with some information about the execution. + +- `stmtFingerprintIDs`: a list of all statement fingerprint IDs that were part of +this transaction. We limit the list size with the cluster setting +`sql.metrics.transaction_details.max_statement_ids` (default 1000). Ex: `["ab75c017ed028514"]` + + +### Statistics +The statistics column is a JSONB that can be divided in 2 groups: executions and +statistics. The majority of the numeric values on this column have its value as +an object with `mean` and a `sqDiff` (square difference). + +The values from `execution_statistics` are only populated using sampling: + +- `cnt`: how many times this fingerprint was sampled on the aggregation period. +This is helpful to know how many execution these statistics are based on. Ex: `1` +- `contentionTime`: the total time that all the statements of this transaction were in +contention with other statements. This time does not include the time it takes to stream +results back to the client. + Ex: `{"mean": 0.0035941523333333334, "sqDiff": 0.0000036056329785926688}` +- `cpuSQLNanos`: the SQL CPU time spent by all statements of this transaction executing. It +does not include SQL planning time nor KV execution time. Ex: `{"mean": 193173, "sqDiff": 767794898}` +- `maxDiskUsage`: the maximum disk usage. Ex: `{"mean": 0, "sqDiff": 0}` +- `maxMemUsage`: the maximum memory usage. Ex: `{"mean": 2.2528E+5, "sqDiff": 0}` +- `networkBytes`: the amount of data transferred over the network (e.g., between regions and +nodes). If this value is 0, the transaction was executed on a single node. Ex: `{"mean": 0, "sqDiff": 0}` +- `networkMsgs`: the amount of network messages over the network. Ex: `{"mean": 0, "sqDiff": 0}` +- `mvccIteratorStats`: statistics by the MVCC iterator. Ex: `{"blockBytes": {"mean": 0, "sqDiff": 0}, + "blockBytesInCache": {"mean": 0, "sqDiff": 0}, + "keyBytes": {"mean": 0, "sqDiff": 0}, + "pointCount": {"mean": 1006, "sqDiff": 0}, + "pointsCoveredByRangeTombstones": {"mean": 0, "sqDiff": 0}, + "rangeKeyContainedPoints": {"mean": 0, "sqDiff": 0}, + "rangeKeyCount": {"mean": 0, "sqDiff": 0}, + "rangeKeySkippedPoints": {"mean": 0, "sqDiff": 0}, + "seekCount": {"mean": 1E+1, "sqDiff": 0}, + "seekCountInternal": {"mean": 1E+1, "sqDiff": 0}, + "stepCount": {"mean": 1E+3, "sqDiff": 0}, + "stepCountInternal": {"mean": 1006, "sqDiff": 0}, + "valueBytes": {"mean": 149912, "sqDiff": 0}}` + + +The values from `statistics` are updated on every execution: + + +- `bytesRead`: the aggregation of all bytes read from disk across all operators for this +transaction. Ex: `{"mean": 173717.41176470587, "sqDiff": 38420.117647061816}` +- `cnt`: amount of executions of this transaction. Notice this value will always be greater or +equal the `cnt` on `execution_statistics`. Ex: `17` +- `commitLat`: the time to commit the transaction. Ex: `{"mean": 0.0000020357142857142854, "sqDiff": 1.8496728571428566E-12}` +- `idleLat`: the time between statement executions of the transaction. Ex: `{"mean": 0, "sqDiff": 0}` +- `maxRetries`: amount of retries. Ex: `0` +- `numRows`: number of rows affected by this transaction. Ex: `{"mean": 365.70588235294116, "sqDiff": 1104041.5294117648}` +- `retryLat`: the time spent retrying the transaction. Ex: `{"mean": 0, "sqDiff": 0}` +- `rowsRead`: the amount of rows read by the transaction. Ex: `{"mean": 1E+3, "sqDiff": 0}` +- `rowsWritten`: the amount of rows written by the transaction. Ex: `{"mean": 0, "sqDiff": 0}` +- `svcLat`: the time to service the transaction, from start of parse to end of execute. + Ex: `{"mean": 0.37828015447058816, "sqDiff": 9.518556061599973}` + +### Computed columns +These are values that exist on the statistics column, but are computed again +here to be their own columns. This is helpful so indexes can be created +directly on these columns, which is helpful when running `select`s with filters +on these values. All the indexes filter out `app_name` that start with `$ internal`. +The definition of those values (when not described) are the +same from the items with the same name listed above. + +- `execution_count`. Ex: `6` +- `service_latency`. Ex: `0.049141375` +- `cpu_sql_nanos`. Ex: `577084` +- `contention_time`. Ex: `0` +- `p99_latency`. Ex: `0` +- `total_estimated_execution_time`: the value os the count multiplied by the + mean service latency. Ex: `0.049141375` + +### Extra +There is another column that don't qualify as any of the categories above. + +- `agg_interval`: the interval used to aggregate the executions. Ex: `01:00:00` + + +[^1]: A statement type can be: + 1. DDL (Data Definition Language): deals with database schemas and descriptions. + 2. DML (Data Manipulation Language) deals with data manipulation and it is used to store, modify, retrieve, delete and update data in a database. + 3. DCL (Data Control Language) deals with commands such as GRANT and mostly concerned with rights, permissions and other controls of the database system. + 4. TCL (Transaction Control Language) deals with a transaction within a database. diff --git a/pkg/bench/rttanalysis/testdata/benchmark_expectations b/pkg/bench/rttanalysis/testdata/benchmark_expectations index 111a93d17542..2e76746a83c9 100644 --- a/pkg/bench/rttanalysis/testdata/benchmark_expectations +++ b/pkg/bench/rttanalysis/testdata/benchmark_expectations @@ -114,5 +114,5 @@ exp,benchmark 3,UDFResolution/select_from_udf 1,VirtualTableQueries/select_crdb_internal.invalid_objects_with_1_fk 1,VirtualTableQueries/select_crdb_internal.tables_with_1_fk -11,VirtualTableQueries/virtual_table_cache_with_point_lookups +5,VirtualTableQueries/virtual_table_cache_with_point_lookups 15,VirtualTableQueries/virtual_table_cache_with_schema_change diff --git a/pkg/ccl/backupccl/backup_tenant_test.go b/pkg/ccl/backupccl/backup_tenant_test.go index 54bab8eb219c..d355c1909f07 100644 --- a/pkg/ccl/backupccl/backup_tenant_test.go +++ b/pkg/ccl/backupccl/backup_tenant_test.go @@ -154,7 +154,10 @@ func TestBackupTenantImportingTable(t *testing.T) { } // Destroy the tenant, then restore it. tSrv.AppStopper().Stop(ctx) - if _, err := sqlDB.DB.ExecContext(ctx, "ALTER TENANT [10] STOP SERVICE; DROP TENANT [10] IMMEDIATE"); err != nil { + if _, err := sqlDB.DB.ExecContext(ctx, "ALTER TENANT [10] STOP SERVICE"); err != nil { + t.Fatal(err) + } + if _, err := sqlDB.DB.ExecContext(ctx, "DROP TENANT [10] IMMEDIATE"); err != nil { t.Fatal(err) } if _, err := sqlDB.DB.ExecContext(ctx, "RESTORE TENANT 10 FROM $1", dst); err != nil { diff --git a/pkg/ccl/backupccl/testdata/backup-restore/restore-tenants b/pkg/ccl/backupccl/testdata/backup-restore/restore-tenants index 5d933e454295..9f55d7e5d3f4 100644 --- a/pkg/ccl/backupccl/testdata/backup-restore/restore-tenants +++ b/pkg/ccl/backupccl/testdata/backup-restore/restore-tenants @@ -15,7 +15,11 @@ SELECT crdb_internal.create_tenant(6); # Drop one of them. exec-sql -ALTER TENANT [5] STOP SERVICE; DROP TENANT [5] +ALTER TENANT [5] STOP SERVICE; +---- + +exec-sql +DROP TENANT [5] ---- exec-sql @@ -139,7 +143,11 @@ SELECT id,name,data_state,service_mode,active,json_extract_path_text(crdb_intern # Check that another service mode is also preserved. exec-sql -ALTER TENANT newname STOP SERVICE; ALTER TENANT newname START SERVICE SHARED +ALTER TENANT newname STOP SERVICE; +---- + +exec-sql +ALTER TENANT newname START SERVICE SHARED; ---- query-sql diff --git a/pkg/ccl/logictestccl/testdata/logic_test/tenant_usage b/pkg/ccl/logictestccl/testdata/logic_test/tenant_usage index df423a2c6074..3eee8efe7a50 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/tenant_usage +++ b/pkg/ccl/logictestccl/testdata/logic_test/tenant_usage @@ -20,7 +20,9 @@ SELECT crdb_internal.update_tenant_resource_limits(5, 1000, 100, 0, now(), 0) # Note this marks the tenant as dropped. The GC will not delete the tenant # until after the ttl expires. statement ok -ALTER TENANT [5] STOP SERVICE; +ALTER TENANT [5] STOP SERVICE + +statement ok DROP TENANT [5] query error tenant "5" is not active diff --git a/pkg/ccl/serverccl/server_controller_test.go b/pkg/ccl/serverccl/server_controller_test.go index 5f452cf4c6c4..b5aa24386aa8 100644 --- a/pkg/ccl/serverccl/server_controller_test.go +++ b/pkg/ccl/serverccl/server_controller_test.go @@ -48,6 +48,8 @@ func TestSharedProcessTenantNodeLocalAccess(t *testing.T) { ctx := context.Background() nodeCount := 3 + skip.UnderDuress(t, "slow test") + dirs := make([]string, nodeCount) dirCleanups := make([]func(), nodeCount) for i := 0; i < nodeCount; i++ { @@ -85,8 +87,8 @@ func TestSharedProcessTenantNodeLocalAccess(t *testing.T) { db := sqlutils.MakeSQLRunner(tc.ServerConn(0)) db.Exec(t, `CREATE TENANT application; -ALTER TENANT application GRANT CAPABILITY can_use_nodelocal_storage; -ALTER TENANT application START SERVICE SHARED`) +ALTER TENANT application GRANT CAPABILITY can_use_nodelocal_storage`) + db.Exec(t, `ALTER TENANT application START SERVICE SHARED`) var tenantID uint64 db.QueryRow(t, "SELECT id FROM [SHOW TENANT application]").Scan(&tenantID) @@ -388,7 +390,7 @@ func TestServerControllerMultiNodeTenantStartup(t *testing.T) { defer log.Scope(t).Close(t) ctx := context.Background() - skip.UnderDeadlock(t, "slow under deadlock") + skip.UnderDuress(t, "slow test") t.Logf("starting test cluster") numNodes := 3 tc := serverutils.StartCluster(t, numNodes, base.TestClusterArgs{ @@ -405,7 +407,9 @@ func TestServerControllerMultiNodeTenantStartup(t *testing.T) { t.Logf("starting tenant servers") db := tc.ServerConn(0) - _, err := db.Exec("CREATE TENANT hello; ALTER TENANT hello START SERVICE SHARED") + _, err := db.Exec("CREATE TENANT hello") + require.NoError(t, err) + _, err = db.Exec("ALTER TENANT hello START SERVICE SHARED") require.NoError(t, err) // Pick a random node, try to run some SQL inside that tenant. diff --git a/pkg/ccl/serverccl/shared_process_tenant_test.go b/pkg/ccl/serverccl/shared_process_tenant_test.go index 323a77238f40..72a3eee372ed 100644 --- a/pkg/ccl/serverccl/shared_process_tenant_test.go +++ b/pkg/ccl/serverccl/shared_process_tenant_test.go @@ -34,7 +34,10 @@ func TestSharedProcessTenantNoSpanLimit(t *testing.T) { defer tc.Stopper().Stop(ctx) db := tc.ServerConn(0) - _, err := db.Exec("CREATE TENANT hello; ALTER TENANT hello START SERVICE SHARED") + _, err := db.Exec("CREATE TENANT hello") + require.NoError(t, err) + + _, err = db.Exec("ALTER TENANT hello START SERVICE SHARED") require.NoError(t, err) _, err = db.Exec("SET CLUSTER SETTING spanconfig.virtual_cluster.max_spans = 1000") diff --git a/pkg/cmd/roachtest/cluster.go b/pkg/cmd/roachtest/cluster.go index b13c21b3453f..0a9a092ebfb2 100644 --- a/pkg/cmd/roachtest/cluster.go +++ b/pkg/cmd/roachtest/cluster.go @@ -1428,7 +1428,7 @@ func (c *clusterImpl) HealthStatus( return nil, errors.WithDetail(err, "Unable to get admin UI address(es)") } getStatus := func(ctx context.Context, node int) *HealthStatusResult { - url := fmt.Sprintf(`http://%s/health?ready=1`, adminAddrs[node-1]) + url := fmt.Sprintf(`https://%s/health?ready=1`, adminAddrs[node-1]) resp, err := httputil.Get(ctx, url) if err != nil { return newHealthStatusResult(node, 0, nil, err) @@ -2469,19 +2469,10 @@ func (c *clusterImpl) loggerForCmd( // internal IPs and communication from a test driver to nodes in a cluster // should use external IPs. func (c *clusterImpl) pgURLErr( - ctx context.Context, - l *logger.Logger, - nodes option.NodeListOption, - external bool, - tenant string, - sqlInstance int, + ctx context.Context, l *logger.Logger, nodes option.NodeListOption, opts roachprod.PGURLOptions, ) ([]string, error) { - urls, err := roachprod.PgURL(ctx, l, c.MakeNodes(nodes), c.localCertsDir, roachprod.PGURLOptions{ - External: external, - Secure: c.localCertsDir != "", - VirtualClusterName: tenant, - SQLInstance: sqlInstance, - }) + opts.Secure = c.IsSecure() + urls, err := roachprod.PgURL(ctx, l, c.MakeNodes(nodes), c.localCertsDir, opts) if err != nil { return nil, err } @@ -2493,13 +2484,9 @@ func (c *clusterImpl) pgURLErr( // InternalPGUrl returns the internal Postgres endpoint for the specified nodes. func (c *clusterImpl) InternalPGUrl( - ctx context.Context, - l *logger.Logger, - nodes option.NodeListOption, - tenant string, - sqlInstance int, + ctx context.Context, l *logger.Logger, nodes option.NodeListOption, opts roachprod.PGURLOptions, ) ([]string, error) { - return c.pgURLErr(ctx, l, nodes, false, tenant, sqlInstance) + return c.pgURLErr(ctx, l, nodes, opts) } // Silence unused warning. @@ -2507,13 +2494,10 @@ var _ = (&clusterImpl{}).InternalPGUrl // ExternalPGUrl returns the external Postgres endpoint for the specified nodes. func (c *clusterImpl) ExternalPGUrl( - ctx context.Context, - l *logger.Logger, - nodes option.NodeListOption, - tenant string, - sqlInstance int, + ctx context.Context, l *logger.Logger, nodes option.NodeListOption, opts roachprod.PGURLOptions, ) ([]string, error) { - return c.pgURLErr(ctx, l, nodes, true, tenant, sqlInstance) + opts.External = true + return c.pgURLErr(ctx, l, nodes, opts) } func addrToAdminUIAddr(addr string) (string, error) { @@ -2642,7 +2626,7 @@ func (c *clusterImpl) addr( ctx context.Context, l *logger.Logger, nodes option.NodeListOption, external bool, ) ([]string, error) { var addrs []string - urls, err := c.pgURLErr(ctx, l, nodes, external, "" /* tenant */, 0 /* sqlInstance */) + urls, err := c.pgURLErr(ctx, l, nodes, roachprod.PGURLOptions{External: external}) if err != nil { return nil, err } @@ -2700,7 +2684,10 @@ func (c *clusterImpl) ConnE( for _, opt := range opts { opt(connOptions) } - urls, err := c.ExternalPGUrl(ctx, l, c.Node(node), connOptions.TenantName, connOptions.SQLInstance) + urls, err := c.ExternalPGUrl(ctx, l, c.Node(node), roachprod.PGURLOptions{ + VirtualClusterName: connOptions.TenantName, + SQLInstance: connOptions.SQLInstance, + }) if err != nil { return nil, err } diff --git a/pkg/cmd/roachtest/cluster/BUILD.bazel b/pkg/cmd/roachtest/cluster/BUILD.bazel index 585e12134601..dd796079f885 100644 --- a/pkg/cmd/roachtest/cluster/BUILD.bazel +++ b/pkg/cmd/roachtest/cluster/BUILD.bazel @@ -12,6 +12,7 @@ go_library( deps = [ "//pkg/cmd/roachtest/option", "//pkg/cmd/roachtest/spec", + "//pkg/roachprod", "//pkg/roachprod/install", "//pkg/roachprod/logger", "//pkg/roachprod/prometheus", diff --git a/pkg/cmd/roachtest/cluster/cluster_interface.go b/pkg/cmd/roachtest/cluster/cluster_interface.go index f8cc34aa85f4..530dde262d8f 100644 --- a/pkg/cmd/roachtest/cluster/cluster_interface.go +++ b/pkg/cmd/roachtest/cluster/cluster_interface.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" + "github.com/cockroachdb/cockroach/pkg/roachprod" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/cockroach/pkg/roachprod/prometheus" @@ -79,8 +80,8 @@ type Cluster interface { // SQL connection strings. - InternalPGUrl(ctx context.Context, l *logger.Logger, node option.NodeListOption, tenant string, sqlInstance int) ([]string, error) - ExternalPGUrl(ctx context.Context, l *logger.Logger, node option.NodeListOption, tenant string, sqlInstance int) ([]string, error) + InternalPGUrl(ctx context.Context, l *logger.Logger, node option.NodeListOption, opts roachprod.PGURLOptions) ([]string, error) + ExternalPGUrl(ctx context.Context, l *logger.Logger, node option.NodeListOption, opts roachprod.PGURLOptions) ([]string, error) // SQL clients to nodes. diff --git a/pkg/cmd/roachtest/roachtestutil/BUILD.bazel b/pkg/cmd/roachtest/roachtestutil/BUILD.bazel index 16f4181d23fc..863987a6113c 100644 --- a/pkg/cmd/roachtest/roachtestutil/BUILD.bazel +++ b/pkg/cmd/roachtest/roachtestutil/BUILD.bazel @@ -6,6 +6,7 @@ go_library( "commandbuilder.go", "disk_usage.go", "health_checker.go", + "httpclient.go", "jaeger.go", "utils.go", "validation_check.go", @@ -17,12 +18,16 @@ go_library( "//pkg/cmd/roachtest/option", "//pkg/cmd/roachtest/test", "//pkg/kv/kvpb", + "//pkg/roachprod", "//pkg/roachprod/config", "//pkg/roachprod/install", "//pkg/roachprod/logger", "//pkg/testutils/sqlutils", + "//pkg/util/httputil", "//pkg/util/humanizeutil", "//pkg/util/log", + "//pkg/util/protoutil", + "//pkg/util/syncutil", "//pkg/util/timeutil", "@com_github_cockroachdb_errors//:errors", ], diff --git a/pkg/cmd/roachtest/roachtestutil/httpclient.go b/pkg/cmd/roachtest/roachtestutil/httpclient.go new file mode 100644 index 000000000000..38a0bcca7fa2 --- /dev/null +++ b/pkg/cmd/roachtest/roachtestutil/httpclient.go @@ -0,0 +1,213 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package roachtestutil + +import ( + "context" + "crypto/tls" + "fmt" + "net/http" + "net/http/cookiejar" + "net/url" + "strings" + "time" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod/logger" + "github.com/cockroachdb/cockroach/pkg/util/httputil" + "github.com/cockroachdb/cockroach/pkg/util/protoutil" + "github.com/cockroachdb/cockroach/pkg/util/syncutil" + "github.com/cockroachdb/errors" +) + +// RoachtestHTTPClient is a wrapper over httputil.client that: +// - Adds cookies for every request, allowing the client to +// authenticate with secure clusters. +// - Sets InsecureSkipVerify to true. Roachprod certs are not +// signed by a verified root authority. +// - Unsets DialContext and DisableKeepAlives set in httputil. +// These two settings are not needed for testing purposes. +// +// This hides the details of creating authenticated https requests +// from the roachtest user, as most of it is just boilerplate and +// the same across all tests. +// +// Note that this currently only supports requests to CRDB clusters. +type RoachtestHTTPClient struct { + client *httputil.Client + sessionID string + cluster cluster.Cluster + l *logger.Logger + // Used for safely adding to the cookie jar. + mu syncutil.Mutex +} + +type RoachtestHTTPOptions struct { + Timeout time.Duration +} + +func HTTPTimeout(timeout time.Duration) func(options *RoachtestHTTPOptions) { + return func(options *RoachtestHTTPOptions) { + options.Timeout = timeout + } +} + +// DefaultHTTPClient returns a default RoachtestHTTPClient. +// This should be the default HTTP client used if you are +// trying to make a request to a secure cluster. +func DefaultHTTPClient( + c cluster.Cluster, l *logger.Logger, opts ...func(options *RoachtestHTTPOptions), +) *RoachtestHTTPClient { + roachtestHTTP := RoachtestHTTPClient{ + client: httputil.NewClientWithTimeout(httputil.StandardHTTPTimeout), + cluster: c, + l: l, + } + + // Certificates created by roachprod start are not signed by + // a verified root authority. + roachtestHTTP.client.Transport = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + + httpOptions := &RoachtestHTTPOptions{} + for _, opt := range opts { + opt(httpOptions) + } + if httpOptions.Timeout != 0 { + roachtestHTTP.client.Timeout = httpOptions.Timeout + } + + return &roachtestHTTP +} + +func (r *RoachtestHTTPClient) Get(ctx context.Context, url string) (*http.Response, error) { + if err := r.addCookie(ctx, url); err != nil { + return nil, err + } + return r.client.Get(ctx, url) +} + +func (r *RoachtestHTTPClient) GetJSON( + ctx context.Context, path string, response protoutil.Message, +) error { + if err := r.addCookie(ctx, path); err != nil { + return err + } + return httputil.GetJSON(*r.client.Client, path, response) +} + +func (r *RoachtestHTTPClient) PostProtobuf( + ctx context.Context, path string, request, response protoutil.Message, +) error { + if err := r.addCookie(ctx, path); err != nil { + return err + } + return httputil.PostProtobuf(ctx, *r.client.Client, path, request, response) +} + +// ResetSession removes the saved sessionID so that it is retrieved +// again the next time a request is made. This is usually not required +// as the sessionID does not change unless the cluster is restarted. +func (r *RoachtestHTTPClient) ResetSession() { + r.sessionID = "" +} + +func (r *RoachtestHTTPClient) addCookie(ctx context.Context, cookieUrl string) error { + // If we haven't extracted the sessionID yet, do so. + if r.sessionID == "" { + id, err := getSessionID(ctx, r.cluster, r.l, r.cluster.All()) + if err != nil { + return errors.Wrapf(err, "roachtestutil.addCookie: unable to extract sessionID") + } + r.sessionID = id + } + + name, value, found := strings.Cut(r.sessionID, "=") + if !found { + return errors.New("Cookie not formatted correctly") + } + + url, err := url.Parse(cookieUrl) + if err != nil { + return errors.Wrapf(err, "roachtestutil.addCookie: unable to parse cookieUrl") + } + err = r.SetCookies(url, []*http.Cookie{{Name: name, Value: value}}) + if err != nil { + return errors.Wrapf(err, "roachtestutil.addCookie: unable to set cookie") + } + + return nil +} + +// SetCookies is a helper that checks if a client.CookieJar exists and creates +// one if it doesn't. It then sets the provided cookies through CookieJar.SetCookies. +func (r *RoachtestHTTPClient) SetCookies(u *url.URL, cookies []*http.Cookie) error { + r.mu.Lock() + defer r.mu.Unlock() + if r.client.Jar == nil { + jar, err := cookiejar.New(nil) + if err != nil { + return err + } + r.client.Jar = jar + } + r.client.Jar.SetCookies(u, cookies) + return nil +} + +func getSessionIDOnSingleNode( + ctx context.Context, c cluster.Cluster, l *logger.Logger, node option.NodeListOption, +) (string, error) { + loginCmd := fmt.Sprintf( + "%s auth-session login root --port={pgport%s} --certs-dir ./certs --format raw", + test.DefaultCockroachPath, node, + ) + res, err := c.RunWithDetailsSingleNode(ctx, l, option.WithNodes(node), loginCmd) + if err != nil { + return "", errors.Wrap(err, "failed to authenticate") + } + + var sessionCookie string + for _, line := range strings.Split(res.Stdout, "\n") { + if strings.HasPrefix(line, "session=") { + sessionCookie = line + } + } + if sessionCookie == "" { + return "", fmt.Errorf("failed to find session cookie in `login` output") + } + + return sessionCookie, nil +} + +func getSessionID( + ctx context.Context, c cluster.Cluster, l *logger.Logger, nodes option.NodeListOption, +) (string, error) { + var err error + var cookie string + // The session ID should be the same for all nodes so stop after we successfully + // get it from one node. + for _, node := range nodes { + cookie, err = getSessionIDOnSingleNode(ctx, c, l, c.Node(node)) + if err == nil { + break + } + l.Printf("%s auth session login failed on node %d: %v", test.DefaultCockroachPath, node, err) + } + if err != nil { + return "", errors.Wrapf(err, "roachtestutil.GetSessionID") + } + sessionID := strings.Split(cookie, ";")[0] + return sessionID, nil +} diff --git a/pkg/cmd/roachtest/roachtestutil/mixedversion/mixedversion.go b/pkg/cmd/roachtest/roachtestutil/mixedversion/mixedversion.go index 2582344fd4fc..5a1de118600a 100644 --- a/pkg/cmd/roachtest/roachtestutil/mixedversion/mixedversion.go +++ b/pkg/cmd/roachtest/roachtestutil/mixedversion/mixedversion.go @@ -141,9 +141,7 @@ var ( // defaultClusterSettings is the set of cluster settings always // passed to `clusterupgrade.StartWithSettings` when (re)starting // nodes in a cluster. - defaultClusterSettings = []install.ClusterSettingOption{ - install.SecureOption(true), - } + defaultClusterSettings = []install.ClusterSettingOption{} // minSupportedARM64Version is the minimum version for which there // is a published ARM64 build. If we are running a mixedversion test diff --git a/pkg/cmd/roachtest/roachtestutil/utils.go b/pkg/cmd/roachtest/roachtestutil/utils.go index c1ae4494fe3f..0afd02728ada 100644 --- a/pkg/cmd/roachtest/roachtestutil/utils.go +++ b/pkg/cmd/roachtest/roachtestutil/utils.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/roachprod" "github.com/cockroachdb/cockroach/pkg/roachprod/config" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" @@ -26,14 +27,17 @@ func SystemInterfaceSystemdUnitName() string { return install.VirtualClusterLabel(install.SystemInterfaceName, 0) } -// DefaultPGUrl is a wrapper over ExternalPGUrl that calls it with the arguments +// DefaultPGUrl is a wrapper over roachprod.PgUrl that calls it with the arguments // that *almost* all roachtests want: single tenant and only a single node. -// This wrapper will also make fixing #63145 in the future easier as we can -// add "password authenticated" to the above. func DefaultPGUrl( - ctx context.Context, c cluster.Cluster, l *logger.Logger, node option.NodeListOption, + ctx context.Context, + c cluster.Cluster, + l *logger.Logger, + node option.NodeListOption, + auth install.PGAuthMode, ) (string, error) { - pgurl, err := c.ExternalPGUrl(ctx, l, node, "", 0) + opts := roachprod.PGURLOptions{Auth: auth, Secure: c.IsSecure()} + pgurl, err := roachprod.PgURL(ctx, l, c.MakeNodes(node), "certs", opts) if err != nil { return "", err } diff --git a/pkg/cmd/roachtest/tests/activerecord.go b/pkg/cmd/roachtest/tests/activerecord.go index 5b62cb409378..c88b1ee84ae6 100644 --- a/pkg/cmd/roachtest/tests/activerecord.go +++ b/pkg/cmd/roachtest/tests/activerecord.go @@ -51,7 +51,8 @@ func registerActiveRecord(r registry.Registry) { t.Status("setting up cockroach") startOpts := option.DefaultStartOptsInMemory() startOpts.RoachprodOpts.SQLPort = config.DefaultSQLPort - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.All()) + // Activerecord uses root user with ssl disabled. + c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(install.SecureOption(false)), c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { diff --git a/pkg/cmd/roachtest/tests/admission_control_elastic_io.go b/pkg/cmd/roachtest/tests/admission_control_elastic_io.go index 6ae40aa1fc78..cdf0a98fb9ae 100644 --- a/pkg/cmd/roachtest/tests/admission_control_elastic_io.go +++ b/pkg/cmd/roachtest/tests/admission_control_elastic_io.go @@ -65,10 +65,6 @@ func registerElasticIO(r registry.Registry) { WithGrafanaDashboardJSON(grafana.ChangefeedAdmissionControlGrafana) err := c.StartGrafana(ctx, t.L(), promCfg) require.NoError(t, err) - promClient, err := clusterstats.SetupCollectorPromClient(ctx, c, t.L(), promCfg) - require.NoError(t, err) - statCollector := clusterstats.NewStatsCollector(ctx, promClient) - c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.Node(workAndPromNode)) startOpts := option.DefaultStartOptsNoBackups() roachtestutil.SetDefaultAdminUIPort(c, &startOpts.RoachprodOpts) @@ -76,6 +72,9 @@ func registerElasticIO(r registry.Registry) { "--vmodule=io_load_listener=2") settings := install.MakeClusterSettings() c.Start(ctx, t.L(), startOpts, settings, c.Range(1, crdbNodes)) + promClient, err := clusterstats.SetupCollectorPromClient(ctx, c, t.L(), promCfg) + require.NoError(t, err) + statCollector := clusterstats.NewStatsCollector(ctx, promClient) setAdmissionControl(ctx, t, c, true) duration := 30 * time.Minute t.Status("running workload") @@ -85,7 +84,7 @@ func registerElasticIO(r registry.Registry) { url := fmt.Sprintf(" {pgurl:1-%d}", crdbNodes) cmd := "./workload run kv --init --histograms=perf/stats.json --concurrency=512 " + "--splits=1000 --read-percent=0 --min-block-bytes=65536 --max-block-bytes=65536 " + - "--txn-qos=background --tolerate-errors" + dur + url + "--txn-qos=background --tolerate-errors --secure" + dur + url c.Run(ctx, option.WithNodes(c.Node(workAndPromNode)), cmd) return nil }) diff --git a/pkg/cmd/roachtest/tests/admission_control_intent_resolution.go b/pkg/cmd/roachtest/tests/admission_control_intent_resolution.go index de66ebba16f9..a74aaa3f0a96 100644 --- a/pkg/cmd/roachtest/tests/admission_control_intent_resolution.go +++ b/pkg/cmd/roachtest/tests/admission_control_intent_resolution.go @@ -64,17 +64,18 @@ func registerIntentResolutionOverload(r registry.Registry) { WithGrafanaDashboardJSON(grafana.ChangefeedAdmissionControlGrafana) err := c.StartGrafana(ctx, t.L(), promCfg) require.NoError(t, err) - promClient, err := clusterstats.SetupCollectorPromClient(ctx, c, t.L(), promCfg) - require.NoError(t, err) - statCollector := clusterstats.NewStatsCollector(ctx, promClient) startOpts := option.DefaultStartOptsNoBackups() startOpts.RoachprodOpts.ExtraArgs = append(startOpts.RoachprodOpts.ExtraArgs, "--vmodule=io_load_listener=2") - roachtestutil.SetDefaultSQLPort(c, &startOpts.RoachprodOpts) roachtestutil.SetDefaultAdminUIPort(c, &startOpts.RoachprodOpts) settings := install.MakeClusterSettings() c.Start(ctx, t.L(), startOpts, settings, c.Range(1, crdbNodes)) + + promClient, err := clusterstats.SetupCollectorPromClient(ctx, c, t.L(), promCfg) + require.NoError(t, err) + statCollector := clusterstats.NewStatsCollector(ctx, promClient) + setAdmissionControl(ctx, t, c, true) t.Status("running txn") m := c.NewMonitor(ctx, c.Range(1, crdbNodes)) diff --git a/pkg/cmd/roachtest/tests/admission_control_multitenant_fairness.go b/pkg/cmd/roachtest/tests/admission_control_multitenant_fairness.go index da938cbe0979..5a0f01235c9e 100644 --- a/pkg/cmd/roachtest/tests/admission_control_multitenant_fairness.go +++ b/pkg/cmd/roachtest/tests/admission_control_multitenant_fairness.go @@ -145,7 +145,7 @@ func runMultiTenantFairness( t.L().Printf("starting cockroach securely (<%s)", time.Minute) c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), - install.MakeClusterSettings(install.SecureOption(true)), + install.MakeClusterSettings(), crdbNode, ) diff --git a/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go b/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go index 071c67c96e82..41f106ef369c 100644 --- a/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go +++ b/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go @@ -124,7 +124,7 @@ func verifyNodeLiveness( if err := retry.WithMaxAttempts(ctx, retry.Options{ MaxBackoff: 500 * time.Millisecond, }, 60, func() (err error) { - response, err = getMetrics(ctx, adminURLs[0], now.Add(-runDuration), now, []tsQuery{ + response, err = getMetrics(ctx, c, t, adminURLs[0], now.Add(-runDuration), now, []tsQuery{ { name: "cr.node.liveness.heartbeatfailures", queryType: total, diff --git a/pkg/cmd/roachtest/tests/allocator.go b/pkg/cmd/roachtest/tests/allocator.go index 356e4aad3e3e..ab7965252c74 100644 --- a/pkg/cmd/roachtest/tests/allocator.go +++ b/pkg/cmd/roachtest/tests/allocator.go @@ -21,7 +21,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/clusterstats" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -48,17 +47,12 @@ func registerAllocator(r registry.Registry) { db := c.Conn(ctx, t.L(), 1) defer db.Close() - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - m := c.NewMonitor(ctx, c.Range(1, start)) m.Go(func(ctx context.Context) error { t.Status("loading fixture") if err := c.RunE( ctx, option.WithNodes(c.Node(1)), - "./cockroach", "workload", "fixtures", "import", "tpch", "--scale-factor", "10", pgurl, + "./cockroach", "workload", "fixtures", "import", "tpch", "--scale-factor", "10", "{pgurl:1}", ); err != nil { t.Fatal(err) } @@ -73,7 +67,7 @@ func registerAllocator(r registry.Registry) { WithCluster(clusNodes.InstallNodes()). WithPrometheusNode(promNode.InstallNodes()[0]) - err = c.StartGrafana(ctx, t.L(), cfg) + err := c.StartGrafana(ctx, t.L(), cfg) require.NoError(t, err) cleanupFunc := func() { @@ -91,7 +85,7 @@ func registerAllocator(r registry.Registry) { // Start the remaining nodes to kick off upreplication/rebalancing. c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.Range(start+1, nodes)) - c.Run(ctx, option.WithNodes(c.Node(1)), fmt.Sprintf("./cockroach workload init kv --drop '%s'", pgurl)) + c.Run(ctx, option.WithNodes(c.Node(1)), "./cockroach workload init kv --drop {pgurl:1}") for node := 1; node <= nodes; node++ { node := node // TODO(dan): Ideally, the test would fail if this queryload failed, @@ -457,13 +451,9 @@ FROM crdb_internal.kv_store_status t.Fatalf("expected 0 mis-replicated ranges, but found %d", n) } - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } decom := func(id int) { c.Run(ctx, option.WithNodes(c.Node(1)), - fmt.Sprintf("./cockroach node decommission --insecure --url=%s --wait=none %d", pgurl, id)) + fmt.Sprintf("./cockroach node decommission --certs-dir=certs --port={pgport%s} --wait=none %d", c.Node(id), id)) } // Decommission a node. The ranges should down-replicate to 7 replicas. diff --git a/pkg/cmd/roachtest/tests/alterpk.go b/pkg/cmd/roachtest/tests/alterpk.go index 942b067556c5..f0ab215feda1 100644 --- a/pkg/cmd/roachtest/tests/alterpk.go +++ b/pkg/cmd/roachtest/tests/alterpk.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -105,14 +104,9 @@ func registerAlterPK(r registry.Registry) { const duration = 10 * time.Minute roachNodes, loadNode := setupTest(ctx, t, c) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(1)) - if err != nil { - t.Fatal(err) - } cmd := fmt.Sprintf( - "./cockroach workload fixtures import tpcc --warehouses=%d --db=tpcc '%s'", + "./cockroach workload fixtures import tpcc --warehouses=%d --db=tpcc {pgurl:1}", warehouses, - pgurl, ) if err := c.RunE(ctx, option.WithNodes(c.Node(roachNodes[0])), cmd); err != nil { t.Fatal(err) diff --git a/pkg/cmd/roachtest/tests/asyncpg.go b/pkg/cmd/roachtest/tests/asyncpg.go index f0edb349509f..a0f9016ce546 100644 --- a/pkg/cmd/roachtest/tests/asyncpg.go +++ b/pkg/cmd/roachtest/tests/asyncpg.go @@ -12,6 +12,7 @@ package tests import ( "context" + "fmt" "regexp" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" @@ -22,11 +23,11 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachprod/install" ) -const asyncpgRunTestCmd = ` -source venv/bin/activate && +var asyncpgRunTestCmd = fmt.Sprintf(` +source venv/bin/activate && cd /mnt/data1/asyncpg && -PGPORT={pgport:1} PGHOST=localhost PGUSER=test_admin PGDATABASE=defaultdb python3 setup.py test > asyncpg.stdout -` +PGPORT={pgport:1} PGHOST=localhost PGUSER=%s PGPASSWORD=%s PGSSLROOTCERT=$HOME/certs/ca.crt PGSSLMODE=require PGDATABASE=defaultdb python3 setup.py test > asyncpg.stdout +`, install.DefaultUser, install.DefaultPassword) var asyncpgReleaseTagRegex = regexp.MustCompile(`^(?Pv\d+)\.(?P\d+)\.(?P\d+)$`) diff --git a/pkg/cmd/roachtest/tests/asyncpg_blocklist.go b/pkg/cmd/roachtest/tests/asyncpg_blocklist.go index ea9c63423330..c22cc398faa5 100644 --- a/pkg/cmd/roachtest/tests/asyncpg_blocklist.go +++ b/pkg/cmd/roachtest/tests/asyncpg_blocklist.go @@ -38,6 +38,9 @@ var asyncpgBlocklist = blocklist{ `test_codecs.TestCodecs.test_unknown_type_text_fallback`: "unsupported feature - https://github.com/cockroachdb/cockroach/issues/54516", `test_codecs.TestCodecs.test_void`: "unknown", `test_connect.TestSettings.test_get_settings_01`: "unknown", + `test_copy.TestCopyFrom.test_copy_from_query_cancellation_explicit`: "known asyncpg ssl issue - https://github.com/MagicStack/asyncpg/issues/240", + `test_copy.TestCopyFrom.test_copy_from_query_timeout_1`: "known asyncpg ssl issue - https://github.com/MagicStack/asyncpg/issues/240", + `test_copy.TestCopyFrom.test_copy_from_query_to_sink`: "known asyncpg ssl issue - https://github.com/MagicStack/asyncpg/issues/240", `test_copy.TestCopyFrom.test_copy_from_table_basics`: "no support for COPY TO - https://github.com/cockroachdb/cockroach/issues/85571", `test_copy.TestCopyTo.test_copy_to_table_basics`: "unknown", `test_cursor.TestCursor.test_cursor_02`: "unknown", @@ -60,7 +63,6 @@ var asyncpgBlocklist = blocklist{ `test_introspection.TestIntrospection.test_introspection_on_large_db`: "unsupported feature - https://github.com/cockroachdb/cockroach/issues/22456", `test_introspection.TestIntrospection.test_introspection_retries_after_cache_bust`: "unsupported feature - https://github.com/cockroachdb/cockroach/issues/27796", `test_introspection.TestIntrospection.test_introspection_sticks_for_ps`: "unknown type: pg_catalog.json", - `test_listeners.TestListeners.test_dangling_listener_warns`: "LISTEN - https://github.com/cockroachdb/cockroach/issues/41522", `test_listeners.TestListeners.test_listen_01`: "LISTEN - https://github.com/cockroachdb/cockroach/issues/41522", `test_listeners.TestListeners.test_listen_02`: "LISTEN - https://github.com/cockroachdb/cockroach/issues/41522", `test_listeners.TestListeners.test_listen_notletters`: "LISTEN - https://github.com/cockroachdb/cockroach/issues/41522", @@ -68,7 +70,6 @@ var asyncpgBlocklist = blocklist{ `test_listeners.TestLogListeners.test_log_listener_02`: "unsupported feature - https://github.com/cockroachdb/cockroach/issues/17511", `test_listeners.TestLogListeners.test_log_listener_03`: "unsupported feature - https://github.com/cockroachdb/cockroach/issues/17511", `test_pool.TestPool.test_pool_remote_close`: "unsupported pg_terminate_backend() function", - `test_prepare.TestPrepare.test_prepare_08_big_result`: "unknown", `test_prepare.TestPrepare.test_prepare_09_raise_error`: "unsupported feature - https://github.com/cockroachdb/cockroach/issues/17511", `test_prepare.TestPrepare.test_prepare_14_explain`: "unknown", `test_prepare.TestPrepare.test_prepare_16_command_result`: "unknown", diff --git a/pkg/cmd/roachtest/tests/autoupgrade.go b/pkg/cmd/roachtest/tests/autoupgrade.go index bf4bb792d23e..35ff010efb53 100644 --- a/pkg/cmd/roachtest/tests/autoupgrade.go +++ b/pkg/cmd/roachtest/tests/autoupgrade.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/testutils" @@ -74,12 +73,8 @@ func registerAutoUpgrade(r registry.Registry) { decommissionAndStop := func(node int) error { t.WorkerStatus("decommission") - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(node)) - if err != nil { - return err - } if err := c.RunE(ctx, option.WithNodes(c.Node(node)), - fmt.Sprintf("./cockroach node decommission %d --insecure --url=%s", node, pgurl)); err != nil { + fmt.Sprintf("./cockroach node decommission %d --certs-dir=certs --port={pgport%s}", node, c.Node(node))); err != nil { return err } t.WorkerStatus("stop") diff --git a/pkg/cmd/roachtest/tests/awsdms.go b/pkg/cmd/roachtest/tests/awsdms.go index 1cda78eb1b5f..a81dcd5f0e5e 100644 --- a/pkg/cmd/roachtest/tests/awsdms.go +++ b/pkg/cmd/roachtest/tests/awsdms.go @@ -766,11 +766,9 @@ func setupDMSEndpointsAndTask( PostgreSQLSettings: &dmstypes.PostgreSQLSettings{ DatabaseName: proto.String(awsdmsCRDBDatabase), Username: proto.String(awsdmsCRDBUser), - // Password is a required field, but CockroachDB doesn't take passwords in - // --insecure mode. As such, put in some garbage. - Password: proto.String("garbage"), - Port: proto.Int32(26257), - ServerName: proto.String(externalCRDBAddr[0]), + Password: proto.String(awsdmsPassword), + Port: proto.Int32(26257), + ServerName: proto.String(externalCRDBAddr[0]), }, }, endpoint: dmsEndpoints.defaultTarget, diff --git a/pkg/cmd/roachtest/tests/backup.go b/pkg/cmd/roachtest/tests/backup.go index 7cc9953b9950..a4a779eb5973 100644 --- a/pkg/cmd/roachtest/tests/backup.go +++ b/pkg/cmd/roachtest/tests/backup.go @@ -335,15 +335,11 @@ func registerBackup(r registry.Registry) { m := c.NewMonitor(ctx) m.Go(func(ctx context.Context) error { t.Status(`running backup`) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(1)) - if err != nil { - return err - } // Tick once before starting the backup, and once after to capture the // total elapsed time. This is used by roachperf to compute and display // the average MB/sec per node. tick() - c.Run(ctx, option.WithNodes(c.Node(1)), `./cockroach sql --insecure --url=`+pgurl+` -e " + c.Run(ctx, option.WithNodes(c.Node(1)), `./cockroach sql --url={pgurl:1} -e " BACKUP bank.bank TO 'gs://`+backupTestingBucket+`/`+dest+`?AUTH=implicit'"`) tick() diff --git a/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go b/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go index 42c1d125e698..d51fe26e2b17 100644 --- a/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go +++ b/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go @@ -110,7 +110,7 @@ func backupRestoreRoundTrip( "COCKROACH_MIN_RANGE_MAX_BYTES=1", }) - c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings(install.SecureOption(true), envOption), roachNodes) + c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings(envOption), roachNodes) m := c.NewMonitor(ctx, roachNodes) m.Go(func(ctx context.Context) error { diff --git a/pkg/cmd/roachtest/tests/build_info.go b/pkg/cmd/roachtest/tests/build_info.go index 232d3c7ae9a0..7799cbc5655c 100644 --- a/pkg/cmd/roachtest/tests/build_info.go +++ b/pkg/cmd/roachtest/tests/build_info.go @@ -12,15 +12,14 @@ package tests import ( "context" - "net/http" "strings" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/server/serverpb" - "github.com/cockroachdb/cockroach/pkg/util/httputil" ) // RunBuildInfo is a test that sanity checks the build info. @@ -32,8 +31,9 @@ func RunBuildInfo(ctx context.Context, t test.Test, c cluster.Cluster) { if err != nil { t.Fatal(err) } - url := `http://` + adminUIAddrs[0] + `/_status/details/local` - err = httputil.GetJSON(http.Client{}, url, &details) + url := `https://` + adminUIAddrs[0] + `/_status/details/local` + client := roachtestutil.DefaultHTTPClient(c, t.L()) + err = client.GetJSON(ctx, url, &details) if err != nil { t.Fatal(err) } diff --git a/pkg/cmd/roachtest/tests/clearrange.go b/pkg/cmd/roachtest/tests/clearrange.go index 31625bf6421d..76a2b8e4f9ad 100644 --- a/pkg/cmd/roachtest/tests/clearrange.go +++ b/pkg/cmd/roachtest/tests/clearrange.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -66,16 +65,12 @@ func registerClearRange(r registry.Registry) { func runClearRange(ctx context.Context, t test.Test, c cluster.Cluster, aggressiveChecks bool) { t.Status("restoring fixture") c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } m := c.NewMonitor(ctx) m.Go(func(ctx context.Context) error { // NB: on a 10 node cluster, this should take well below 3h. tBegin := timeutil.Now() c.Run(ctx, option.WithNodes(c.Node(1)), "./cockroach", "workload", "fixtures", "import", "bank", - "--payload-bytes=10240", "--ranges=10", "--rows=65104166", "--seed=4", "--db=bigbank", pgurl) + "--payload-bytes=10240", "--ranges=10", "--rows=65104166", "--seed=4", "--db=bigbank", "{pgurl:1}") t.L().Printf("import took %.2fs", timeutil.Since(tBegin).Seconds()) return nil }) @@ -104,10 +99,9 @@ func runClearRange(ctx context.Context, t test.Test, c cluster.Cluster, aggressi // Use a 120s connect timeout to work around the fact that the server will // declare itself ready before it's actually 100% ready. See: // https://github.com/cockroachdb/cockroach/issues/34897#issuecomment-465089057 - c.Run(ctx, option.WithNodes(c.Node(1)), fmt.Sprintf( - `COCKROACH_CONNECT_TIMEOUT=120 ./cockroach sql --url=%s --insecure -e "DROP DATABASE IF EXISTS tinybank"`, pgurl)) + c.Run(ctx, option.WithNodes(c.Node(1)), `COCKROACH_CONNECT_TIMEOUT=120 ./cockroach sql --url={pgurl:1} -e "DROP DATABASE IF EXISTS tinybank"`) c.Run(ctx, option.WithNodes(c.Node(1)), "./cockroach", "workload", "fixtures", "import", "bank", "--db=tinybank", - "--payload-bytes=100", "--ranges=10", "--rows=800", "--seed=1", pgurl) + "--payload-bytes=100", "--ranges=10", "--rows=800", "--seed=1", "{pgurl:1}") t.Status() @@ -139,7 +133,7 @@ ORDER BY raw_start_key ASC LIMIT 1`, }() m.Go(func(ctx context.Context) error { - c.Run(ctx, option.WithNodes(c.Node(1)), `./cockroach workload init kv`, pgurl) + c.Run(ctx, option.WithNodes(c.Node(1)), `./cockroach workload init kv {pgurl:1}`) c.Run(ctx, option.WithNodes(c.All()), fmt.Sprintf(`./cockroach workload run kv --concurrency=32 --duration=1h --tolerate-errors {pgurl%s}`, c.All())) return nil }) diff --git a/pkg/cmd/roachtest/tests/cli.go b/pkg/cmd/roachtest/tests/cli.go index d776ba1daee6..0d0025030047 100644 --- a/pkg/cmd/roachtest/tests/cli.go +++ b/pkg/cmd/roachtest/tests/cli.go @@ -47,7 +47,7 @@ func runCLINodeStatus(ctx context.Context, t test.Test, c cluster.Cluster) { } nodeStatus := func() (_ string, _ []string, err error) { - result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(c.Node(1)), "./cockroach node status --insecure -p {pgport:1}") + result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(c.Node(1)), "./cockroach node status --certs-dir=certs -p {pgport:1}") if err != nil { return "", nil, err } diff --git a/pkg/cmd/roachtest/tests/cluster_init.go b/pkg/cmd/roachtest/tests/cluster_init.go index 6a79af0b23f2..bc2ad1f3cb87 100644 --- a/pkg/cmd/roachtest/tests/cluster_init.go +++ b/pkg/cmd/roachtest/tests/cluster_init.go @@ -13,7 +13,6 @@ package tests import ( "context" gosql "database/sql" - "fmt" "io" "net/http" "strings" @@ -48,10 +47,14 @@ func runClusterInit(ctx context.Context, t test.Test, c cluster.Cluster) { // via the join targets. startOpts.RoachprodOpts.JoinTargets = c.All() + // Start the cluster in insecure mode to allow it to test both + // authenticated and unauthenticated code paths. + settings := install.MakeClusterSettings(install.SecureOption(false)) + for _, initNode := range []int{2, 1} { c.Wipe(ctx, false /* preserveCerts */) t.L().Printf("starting test with init node %d", initNode) - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings()) + c.Start(ctx, t.L(), startOpts, settings) urlMap := make(map[int]string) adminUIAddrs, err := c.ExternalAdminUIAddr(ctx, t.L(), c.All()) @@ -150,8 +153,7 @@ func runClusterInit(ctx context.Context, t test.Test, c cluster.Cluster) { } t.L().Printf("sending init command to node %d", initNode) - c.Run(ctx, option.WithNodes(c.Node(initNode)), - fmt.Sprintf(`./cockroach init --insecure --port={pgport:%d}`, initNode)) + c.Run(ctx, option.WithNodes(c.Node(initNode)), `./cockroach init --url={pgurl:1}`) // This will only succeed if 3 nodes joined the cluster. err = WaitFor3XReplication(ctx, t, t.L(), dbs[0]) @@ -160,8 +162,7 @@ func runClusterInit(ctx context.Context, t test.Test, c cluster.Cluster) { execCLI := func(runNode int, extraArgs ...string) (string, error) { args := []string{"./cockroach"} args = append(args, extraArgs...) - args = append(args, "--insecure") - args = append(args, fmt.Sprintf("--port={pgport:%d}", runNode)) + args = append(args, "--url={pgurl:1}") result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(c.Node(runNode)), args...) combinedOutput := result.Stdout + result.Stderr t.L().Printf("%s\n", combinedOutput) diff --git a/pkg/cmd/roachtest/tests/cluster_to_cluster.go b/pkg/cmd/roachtest/tests/cluster_to_cluster.go index 10992a1819f5..a4b9d0ff194b 100644 --- a/pkg/cmd/roachtest/tests/cluster_to_cluster.go +++ b/pkg/cmd/roachtest/tests/cluster_to_cluster.go @@ -37,6 +37,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/multitenant/mtinfopb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/roachprod" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/cockroach/pkg/roachprod/prometheus" @@ -501,20 +502,20 @@ func (rd *replicationDriver) setupC2C( srcStartOps.RoachprodOpts.InitTarget = 1 roachtestutil.SetDefaultAdminUIPort(c, &srcStartOps.RoachprodOpts) - srcClusterSetting := install.MakeClusterSettings(install.SecureOption(true)) + srcClusterSetting := install.MakeClusterSettings() c.Start(ctx, t.L(), srcStartOps, srcClusterSetting, srcCluster) // TODO(msbutler): allow for backups once this test stabilizes a bit more. dstStartOps := option.DefaultStartOptsNoBackups() dstStartOps.RoachprodOpts.InitTarget = rd.rs.srcNodes + 1 roachtestutil.SetDefaultAdminUIPort(c, &dstStartOps.RoachprodOpts) - dstClusterSetting := install.MakeClusterSettings(install.SecureOption(true)) + dstClusterSetting := install.MakeClusterSettings() c.Start(ctx, t.L(), dstStartOps, dstClusterSetting, dstCluster) srcNode := srcCluster.SeededRandNode(rd.rng) destNode := dstCluster.SeededRandNode(rd.rng) - addr, err := c.ExternalPGUrl(ctx, t.L(), srcNode, "" /* tenant */, 0 /* sqlInstance */) + addr, err := c.ExternalPGUrl(ctx, t.L(), srcNode, roachprod.PGURLOptions{}) t.L().Printf("Randomly chosen src node %d for gateway with address %s", srcNode, addr) t.L().Printf("Randomly chosen dst node %d for gateway", destNode) @@ -1609,7 +1610,7 @@ func registerClusterReplicationResilience(r registry.Registry) { shutdownNode: rrd.shutdownNode, watcherNode: destinationWatcherNode, crdbNodes: rrd.crdbNodes(), - restartSettings: []install.ClusterSettingOption{install.SecureOption(true)}, + restartSettings: []install.ClusterSettingOption{}, rng: rrd.rng, } if err := executeNodeShutdown(ctx, t, c, shutdownCfg, shutdownStarter()); err != nil { diff --git a/pkg/cmd/roachtest/tests/connection_latency.go b/pkg/cmd/roachtest/tests/connection_latency.go index 3869c0a81fa7..ef3e664cedc8 100644 --- a/pkg/cmd/roachtest/tests/connection_latency.go +++ b/pkg/cmd/roachtest/tests/connection_latency.go @@ -37,7 +37,7 @@ func runConnectionLatencyTest( err := c.PutE(ctx, t.L(), t.DeprecatedWorkload(), "./workload") require.NoError(t, err) - settings := install.MakeClusterSettings(install.SecureOption(true)) + settings := install.MakeClusterSettings() // Don't start a backup schedule as this roachtest reports roachperf results. err = c.StartE(ctx, t.L(), option.DefaultStartOptsNoBackups(), settings) require.NoError(t, err) diff --git a/pkg/cmd/roachtest/tests/copyfrom.go b/pkg/cmd/roachtest/tests/copyfrom.go index ce347a009708..7ceb8967c969 100644 --- a/pkg/cmd/roachtest/tests/copyfrom.go +++ b/pkg/cmd/roachtest/tests/copyfrom.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" @@ -145,7 +146,7 @@ func runCopyFromCRDB(ctx context.Context, t test.Test, c cluster.Cluster, sf int db, err := c.ConnE(ctx, t.L(), 1) require.NoError(t, err) stmts := []string{ - "CREATE USER importer", + "CREATE USER importer WITH PASSWORD '123'", fmt.Sprintf("ALTER ROLE importer SET copy_from_atomic_enabled = %t", atomic), } for _, stmt := range stmts { @@ -154,7 +155,7 @@ func runCopyFromCRDB(ctx context.Context, t test.Test, c cluster.Cluster, sf int t.Fatal(err) } } - urls, err := c.InternalPGUrl(ctx, t.L(), c.Node(1), "" /* tenant */, 0 /* sqlInstance */) + urls, err := c.InternalPGUrl(ctx, t.L(), c.Node(1), roachprod.PGURLOptions{Auth: install.AuthUserPassword}) require.NoError(t, err) m := c.NewMonitor(ctx, c.All()) m.Go(func(ctx context.Context) error { @@ -162,10 +163,10 @@ func runCopyFromCRDB(ctx context.Context, t test.Test, c cluster.Cluster, sf int urlstr := strings.Replace(urls[0], "?", "/defaultdb?", 1) u, err := url.Parse(urlstr) require.NoError(t, err) - u.User = url.User("importer") + u.User = url.UserPassword("importer", "123") urlstr = u.String() - c.Run(ctx, option.WithNodes(c.Node(1)), fmt.Sprintf("psql %s -c 'SELECT 1'", urlstr)) - c.Run(ctx, option.WithNodes(c.Node(1)), fmt.Sprintf("psql %s -c '%s'", urlstr, lineitemSchema)) + c.Run(ctx, option.WithNodes(c.Node(1)), fmt.Sprintf("psql '%s' -c 'SELECT 1'", urlstr)) + c.Run(ctx, option.WithNodes(c.Node(1)), fmt.Sprintf("psql '%s' -c '%s'", urlstr, lineitemSchema)) runTest(ctx, t, c, fmt.Sprintf("psql '%s'", urlstr)) return nil }) diff --git a/pkg/cmd/roachtest/tests/decommission.go b/pkg/cmd/roachtest/tests/decommission.go index 7f7c3ba819fe..d5ae4bcf5484 100644 --- a/pkg/cmd/roachtest/tests/decommission.go +++ b/pkg/cmd/roachtest/tests/decommission.go @@ -25,7 +25,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/testutils" @@ -151,12 +150,7 @@ func runDrainAndDecommission( for i := 1; i <= nodes; i++ { c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.Node(i)) } - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - c.Run(ctx, option.WithNodes(c.Node(pinnedNode)), - fmt.Sprintf(`./cockroach workload init kv --drop --splits 1000 '%s'`, pgurl)) + c.Run(ctx, option.WithNodes(c.Node(pinnedNode)), `./cockroach workload init kv --drop --splits 1000 {pgurl:1}`) run := func(stmt string) { db := c.Conn(ctx, t.L(), pinnedNode) @@ -206,11 +200,7 @@ func runDrainAndDecommission( m.Go(func() error { drain := func(id int) error { t.Status(fmt.Sprintf("draining node %d", id)) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(id)) - if err != nil { - t.Fatal(err) - } - return c.RunE(ctx, option.WithNodes(c.Node(id)), fmt.Sprintf("./cockroach node drain --insecure --url=%s", pgurl)) + return c.RunE(ctx, option.WithNodes(c.Node(id)), fmt.Sprintf("./cockroach node drain --certs-dir=certs --port={pgport:%d}", id)) } return drain(id) }) @@ -224,11 +214,7 @@ func runDrainAndDecommission( id := nodes - 3 decom := func(id int) error { t.Status(fmt.Sprintf("decommissioning node %d", id)) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(id)) - if err != nil { - t.Fatal(err) - } - return c.RunE(ctx, option.WithNodes(c.Node(id)), fmt.Sprintf("./cockroach node decommission --self --insecure --url=%s", pgurl)) + return c.RunE(ctx, option.WithNodes(c.Node(id)), fmt.Sprintf("./cockroach node decommission --self --certs-dir=certs --port={pgport:%d}", id)) } return decom(id) }) @@ -891,7 +877,7 @@ func runDecommissionRandomized(ctx context.Context, t test.Test, c cluster.Clust t.L().Printf("wiping n%d and adding it back to the cluster as a new node\n", targetNode) c.Stop(ctx, t.L(), option.DefaultStopOpts(), c.Node(targetNode)) - c.Wipe(ctx, false /*preserveCerts */, c.Node(targetNode)) + c.Wipe(ctx, true /*preserveCerts */, c.Node(targetNode)) joinNode := h.getRandNode() internalAddrs, err := c.InternalAddr(ctx, t.L(), c.Node(joinNode)) @@ -1136,13 +1122,9 @@ func runDecommissionSlow(ctx context.Context, t test.Test, c cluster.Cluster) { m.Go(func(ctx context.Context) error { decom := func(id int) error { t.Status(fmt.Sprintf("decommissioning node %d", id)) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(id)) - if err != nil { - t.Fatal(err) - } return c.RunE(ctx, option.WithNodes(c.Node(id)), - fmt.Sprintf("./cockroach node decommission %d --insecure --checks=skip --url=%s", id, pgurl), + fmt.Sprintf("./cockroach node decommission %d --checks=skip --certs-dir=certs --port={pgport:%d}", id, id), ) } return decom(id) @@ -1479,12 +1461,8 @@ func execCLI( ) (string, error) { args := []string{"./cockroach"} args = append(args, extraArgs...) - args = append(args, "--insecure") - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(runNode)) - if err != nil { - t.Fatal(err) - } - args = append(args, fmt.Sprintf("--url=%s", pgurl)) + args = append(args, fmt.Sprintf("--port={pgport:%d}", runNode)) + args = append(args, "--certs-dir=certs") result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(c.Node(runNode)), args...) t.L().Printf("%s\n", result.Stdout) return result.Stdout, err diff --git a/pkg/cmd/roachtest/tests/decommissionbench.go b/pkg/cmd/roachtest/tests/decommissionbench.go index d36807b51ce7..6389fd536f30 100644 --- a/pkg/cmd/roachtest/tests/decommissionbench.go +++ b/pkg/cmd/roachtest/tests/decommissionbench.go @@ -16,7 +16,6 @@ import ( gosql "database/sql" "encoding/json" "fmt" - t "log" "math" "os" "path/filepath" @@ -79,7 +78,7 @@ type decommissionBenchSpec struct { drainFirst bool // When true, the test will add a node to the cluster prior to decommission, - // so that the upreplication will overlap with the the decommission. + // so that the upreplication will overlap with the decommission. whileUpreplicating bool // When true, attempts to simulate decommissioning a node with high read @@ -401,11 +400,8 @@ func setupDecommissionBench( t.Status(fmt.Sprintf("initializing cluster with %d warehouses", benchSpec.warehouses)) // Add the connection string here as the port is not decided until c.Start() is called. - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - importCmd = fmt.Sprintf("%s '%s'", importCmd, pgurl) + + importCmd = fmt.Sprintf("%s {pgurl:1}", importCmd) c.Run(ctx, option.WithNodes(c.Node(pinnedNode)), importCmd) if benchSpec.snapshotRate != 0 { @@ -437,7 +433,7 @@ func setupDecommissionBench( } // Wait for initial up-replication. - err = WaitFor3XReplication(ctx, t, t.L(), db) + err := WaitFor3XReplication(ctx, t, t.L(), db) require.NoError(t, err) } } @@ -929,11 +925,7 @@ func runSingleDecommission( if drainFirst { h.t.Status(fmt.Sprintf("draining node%d", target)) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, h.t.L(), c.Node(target)) - if err != nil { - t.Fatal(err) - } - cmd := fmt.Sprintf("./cockroach node drain --url=%s --self --insecure", pgurl) + cmd := fmt.Sprintf("./cockroach node drain --certs-dir=certs --port={pgport%s} --self", c.Node(target)) if err := h.c.RunE(ctx, option.WithNodes(h.c.Node(target)), cmd); err != nil { return err } diff --git a/pkg/cmd/roachtest/tests/disk_stall.go b/pkg/cmd/roachtest/tests/disk_stall.go index 944ac67ac8db..667e704c087e 100644 --- a/pkg/cmd/roachtest/tests/disk_stall.go +++ b/pkg/cmd/roachtest/tests/disk_stall.go @@ -153,7 +153,7 @@ func runDiskStalledDetection( } stallAt := timeutil.Now() - response := mustGetMetrics(ctx, t, adminURL, workloadStartAt, stallAt, []tsQuery{ + response := mustGetMetrics(ctx, c, t, adminURL, workloadStartAt, stallAt, []tsQuery{ {name: "cr.node.sql.query.count", queryType: total}, }) cum := response.Results[0].Datapoints @@ -205,7 +205,7 @@ func runDiskStalledDetection( { now := timeutil.Now() - response := mustGetMetrics(ctx, t, adminURL, workloadStartAt, now, []tsQuery{ + response := mustGetMetrics(ctx, c, t, adminURL, workloadStartAt, now, []tsQuery{ {name: "cr.node.sql.query.count", queryType: total}, }) cum := response.Results[0].Datapoints diff --git a/pkg/cmd/roachtest/tests/django.go b/pkg/cmd/roachtest/tests/django.go index 0a7b0e0febe1..5f2139945f31 100644 --- a/pkg/cmd/roachtest/tests/django.go +++ b/pkg/cmd/roachtest/tests/django.go @@ -230,7 +230,7 @@ source venv/bin/activate && cd /mnt/data1/django/tests && python3 runtests.py %[1]s --settings cockroach_settings -v 2 > %[1]s.stdout ` -const cockroachDjangoSettings = ` +var cockroachDjangoSettings = fmt.Sprintf(` from django.test.runner import DiscoverRunner @@ -238,16 +238,16 @@ DATABASES = { 'default': { 'ENGINE': 'django_cockroachdb', 'NAME': 'django_tests', - 'USER': 'test_admin', - 'PASSWORD': '', + 'USER': '%[1]s', + 'PASSWORD': '%[2]s', 'HOST': 'localhost', 'PORT': {pgport:1}, }, 'other': { 'ENGINE': 'django_cockroachdb', 'NAME': 'django_tests2', - 'USER': 'root', - 'PASSWORD': '', + 'USER': '%[1]s', + 'PASSWORD': '%[2]s', 'HOST': 'localhost', 'PORT': {pgport:1}, }, @@ -269,4 +269,4 @@ class NonDescribingDiscoverRunner(DiscoverRunner): } USE_TZ = False -` +`, install.DefaultUser, install.DefaultPassword) diff --git a/pkg/cmd/roachtest/tests/drain.go b/pkg/cmd/roachtest/tests/drain.go index 8ca2d89ab824..14f2fe9bbf04 100644 --- a/pkg/cmd/roachtest/tests/drain.go +++ b/pkg/cmd/roachtest/tests/drain.go @@ -26,9 +26,9 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/testutils" - "github.com/cockroachdb/cockroach/pkg/util/httputil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" "github.com/jackc/pgx/v4" @@ -127,10 +127,6 @@ func runEarlyExitInConnectionWait(ctx context.Context, t test.Test, c cluster.Cl m.Go(func(ctx context.Context) error { t.Status(fmt.Sprintf("start draining node %d", nodeToDrain)) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(nodeToDrain)) - if err != nil { - t.Fatal(err) - } results, err := c.RunWithDetailsSingleNode( ctx, t.L(), @@ -140,7 +136,7 @@ func runEarlyExitInConnectionWait(ctx context.Context, t test.Test, c cluster.Cl // of server.shutdown.initial_wait, server.shutdown.connections.timeout, // server.shutdown.transactions.timeout times two, and // server.shutdown.lease_transfer_iteration.timeout. - fmt.Sprintf("./cockroach node drain --self --insecure --drain-wait=10s --url=%s", pgurl), + fmt.Sprintf("./cockroach node drain --self --drain-wait=10s --certs-dir=certs --port={pgport:%d}", nodeToDrain), ) if err != nil { return err @@ -171,8 +167,9 @@ func runEarlyExitInConnectionWait(ctx context.Context, t test.Test, c cluster.Cl require.NoError(t, err) addr, err := c.ExternalAdminUIAddr(ctx, t.L(), c.Node(nodeToDrain)) require.NoError(t, err) - url := `http://` + addr[0] + `/health?ready=1` - resp, err := httputil.Get(ctx, url) + url := `https://` + addr[0] + `/health?ready=1` + client := roachtestutil.DefaultHTTPClient(c, t.L()) + resp, err := client.Get(ctx, url) require.NoError(t, err) defer func() { _ = resp.Body.Close() }() require.Equalf(t, http.StatusServiceUnavailable, resp.StatusCode, "expected healthcheck to fail during drain") @@ -248,7 +245,7 @@ func runWarningForConnWait(ctx context.Context, t test.Test, c cluster.Cluster) prepareCluster(ctx, t, c, drainWaitDuration, connectionWaitDuration, queryWaitDuration) - pgURL, err := c.ExternalPGUrl(ctx, t.L(), c.Node(nodeToDrain), "" /* tenant */, 0 /* sqlInstance */) + pgURL, err := c.ExternalPGUrl(ctx, t.L(), c.Node(nodeToDrain), roachprod.PGURLOptions{}) require.NoError(t, err) connNoTxn, err := pgx.Connect(ctx, pgURL[0]) require.NoError(t, err) @@ -260,13 +257,9 @@ func runWarningForConnWait(ctx context.Context, t test.Test, c cluster.Cluster) m := c.NewMonitor(ctx, c.Node(nodeToDrain)) m.Go(func(ctx context.Context) error { t.Status(fmt.Sprintf("draining node %d", nodeToDrain)) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(nodeToDrain)) - if err != nil { - t.Fatal(err) - } return c.RunE(ctx, option.WithNodes(c.Node(nodeToDrain)), - fmt.Sprintf("./cockroach node drain --self --insecure --drain-wait=600s --url=%s", pgurl), + fmt.Sprintf("./cockroach node drain --self --drain-wait=600s --certs-dir=certs --port={pgport:%d}", nodeToDrain), ) }) @@ -341,15 +334,11 @@ func runClusterNotAtQuorum(ctx context.Context, t test.Test, c cluster.Cluster) c.Stop(ctx, t.L(), stopOpts, c.Node(2)) t.Status("start draining node 3") - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(3)) - if err != nil { - t.Fatal(err) - } // Ignore the error, since the command is expected to time out. results, _ := c.RunWithDetailsSingleNode( ctx, t.L(), - option.WithNodes(c.Node(3)), fmt.Sprintf("./cockroach node drain --self --insecure --drain-wait=10s --url=%s", pgurl)) + option.WithNodes(c.Node(3)), "./cockroach node drain --self --drain-wait=10s --certs-dir=certs --port={pgport:3}") t.L().Printf("drain output:\n%s\n%s\n", results.Stdout, results.Stderr) require.Regexp(t, "(cluster settings require a value of at least|could not check drain related cluster settings)", results.Stderr) } diff --git a/pkg/cmd/roachtest/tests/drop.go b/pkg/cmd/roachtest/tests/drop.go index 1eebfc81a990..777b6a713632 100644 --- a/pkg/cmd/roachtest/tests/drop.go +++ b/pkg/cmd/roachtest/tests/drop.go @@ -43,11 +43,7 @@ func registerDrop(r registry.Registry) { m := c.NewMonitor(ctx, c.Range(1, nodes)) m.Go(func(ctx context.Context) error { t.WorkerStatus("importing TPCC fixture") - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - c.Run(ctx, option.WithNodes(c.Node(1)), tpccImportCmd("", warehouses, pgurl)) + c.Run(ctx, option.WithNodes(c.Node(1)), tpccImportCmd("", warehouses, "{pgurl:1}")) // Don't open the DB connection until after the data has been imported. // Otherwise the ALTER TABLE query below might fail to find the diff --git a/pkg/cmd/roachtest/tests/follower_reads.go b/pkg/cmd/roachtest/tests/follower_reads.go index 617ecd30d3da..96aad12ef2d5 100644 --- a/pkg/cmd/roachtest/tests/follower_reads.go +++ b/pkg/cmd/roachtest/tests/follower_reads.go @@ -16,7 +16,6 @@ import ( gosql "database/sql" "fmt" "math/rand" - "net/http" "reflect" "regexp" "strconv" @@ -26,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil/mixedversion" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" @@ -35,7 +35,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/ts/tspb" - "github.com/cockroachdb/cockroach/pkg/util/httputil" "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" @@ -685,7 +684,7 @@ func verifySQLLatency( if err != nil { t.Fatal(err) } - url := "http://" + adminURLs[0] + "/ts/query" + url := "https://" + adminURLs[0] + "/ts/query" var sources []string for i := range liveNodes { sources = append(sources, strconv.Itoa(i)) @@ -701,8 +700,9 @@ func verifySQLLatency( SourceAggregator: tspb.TimeSeriesQueryAggregator_MAX.Enum(), }}, } + client := roachtestutil.DefaultHTTPClient(c, t.L()) var response tspb.TimeSeriesQueryResponse - if err := httputil.PostProtobuf(ctx, http.Client{}, url, &request, &response); err != nil { + if err := client.PostProtobuf(ctx, url, &request, &response); err != nil { t.Fatal(err) } perTenSeconds := response.Results[0].Datapoints @@ -751,7 +751,7 @@ func verifyHighFollowerReadRatios( } adminURLs, err := c.ExternalAdminUIAddr(ctx, t.L(), c.Node(adminNode)) require.NoError(t, err) - url := "http://" + adminURLs[0] + "/ts/query" + url := "https://" + adminURLs[0] + "/ts/query" request := tspb.TimeSeriesQueryRequest{ StartNanos: start.UnixNano(), EndNanos: end.UnixNano(), @@ -771,9 +771,9 @@ func verifyHighFollowerReadRatios( Derivative: tspb.TimeSeriesQueryDerivative_NON_NEGATIVE_DERIVATIVE.Enum(), }) } - + client := roachtestutil.DefaultHTTPClient(c, t.L()) var response tspb.TimeSeriesQueryResponse - if err := httputil.PostProtobuf(ctx, http.Client{}, url, &request, &response); err != nil { + if err := client.PostProtobuf(ctx, url, &request, &response); err != nil { t.Fatal(err) } @@ -860,7 +860,8 @@ func getFollowerReadCounts(ctx context.Context, t test.Test, c cluster.Cluster) return err } url := "http://" + adminUIAddrs[0] + "/_status/vars" - resp, err := httputil.Get(ctx, url) + client := roachtestutil.DefaultHTTPClient(c, t.L()) + resp, err := client.Get(ctx, url) if err != nil { return err } @@ -927,6 +928,7 @@ func parsePrometheusMetric(s string) (*prometheusMetric, bool) { func runFollowerReadsMixedVersionSingleRegionTest( ctx context.Context, t test.Test, c cluster.Cluster, ) { + topology := topologySpec{multiRegion: false} runFollowerReadsMixedVersionTest(ctx, t, c, topology, exactStaleness) } @@ -961,14 +963,6 @@ func runFollowerReadsMixedVersionTest( rc readConsistency, opts ...mixedversion.CustomOption, ) { - // The http requests to the admin UI performed by the test don't play - // well with secure clusters. As of the time of writing, they return - // either of the following errors: - // tls: failed to verify certificate: x509: “node” certificate is not standards compliant - // tls: failed to verify certificate: x509: certificate signed by unknown authority - // - // Disable secure mode for simplicity. - opts = append(opts, mixedversion.ClusterSettingOption(install.SecureOption(false))) mvt := mixedversion.NewTest(ctx, t, t.L(), c, c.All(), opts...) var data map[int]int64 diff --git a/pkg/cmd/roachtest/tests/gopg.go b/pkg/cmd/roachtest/tests/gopg.go index e43c472ff23b..20ba713a42cf 100644 --- a/pkg/cmd/roachtest/tests/gopg.go +++ b/pkg/cmd/roachtest/tests/gopg.go @@ -53,7 +53,12 @@ func registerGopg(r registry.Registry) { } node := c.Node(1) t.Status("setting up cockroach") - c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(), c.All()) + // go-pg does not support reading in the password from the environment + // in v10.9.0. + // See: https://github.com/go-pg/pg/pull/1996 + // TODO(darrylwong): once the above change is part of a release, + // upgrade support to that version and enable secure mode. + c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(install.SecureOption(false)), c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { t.Fatal(err) diff --git a/pkg/cmd/roachtest/tests/gorm.go b/pkg/cmd/roachtest/tests/gorm.go index 6d90307f7bee..f2587191e4ff 100644 --- a/pkg/cmd/roachtest/tests/gorm.go +++ b/pkg/cmd/roachtest/tests/gorm.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/stretchr/testify/require" @@ -105,11 +104,7 @@ func registerGORM(r registry.Registry) { ignorelistName, ignoredFailures := "gormIgnorelist", gormIgnorelist t.L().Printf("Running cockroach version %s, using blocklist %s, using ignorelist %s", version, blocklistName, ignorelistName) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(1)) - if err != nil { - t.Fatal(err) - } - err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`./cockroach sql -e "CREATE DATABASE gorm" --insecure --url=%s`, pgurl)) + err = c.RunE(ctx, option.WithNodes(node), `./cockroach sql -e "CREATE DATABASE gorm" --url={pgurl:1}`) require.NoError(t, err) t.Status("downloading go dependencies for tests") @@ -130,9 +125,9 @@ func registerGORM(r registry.Registry) { ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && rm migrate_test.go && - GORM_DIALECT="postgres" GORM_DSN="user=test_admin password= dbname=gorm host=localhost port={pgport:1} sslmode=disable" + GORM_DIALECT="postgres" GORM_DSN="user=%s password=%s dbname=gorm host=localhost port={pgport:1} sslmode=require" go test -v ./... 2>&1 | %s/bin/go-junit-report > %s`, - gormTestPath, goPath, resultsPath), + gormTestPath, install.DefaultUser, install.DefaultPassword, goPath, resultsPath), ) if err != nil { t.L().Printf("error whilst running tests (may be expected): %#v", err) diff --git a/pkg/cmd/roachtest/tests/gossip.go b/pkg/cmd/roachtest/tests/gossip.go index 985a4fdf8edc..3018b7ae23bd 100644 --- a/pkg/cmd/roachtest/tests/gossip.go +++ b/pkg/cmd/roachtest/tests/gossip.go @@ -15,7 +15,6 @@ import ( gosql "database/sql" "fmt" "net" - "net/http" "net/url" "sort" "strconv" @@ -25,12 +24,13 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/gossip" + "github.com/cockroachdb/cockroach/pkg/roachprod" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/cockroach/pkg/util" - "github.com/cockroachdb/cockroach/pkg/util/httputil" "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" @@ -153,9 +153,10 @@ SELECT node_id } type gossipUtil struct { - waitTime time.Duration - urlMap map[int]string - conn func(ctx context.Context, l *logger.Logger, i int, opts ...func(*option.ConnOption)) *gosql.DB + waitTime time.Duration + urlMap map[int]string + conn func(ctx context.Context, l *logger.Logger, i int, opts ...func(*option.ConnOption)) *gosql.DB + httpClient *roachtestutil.RoachtestHTTPClient } func newGossipUtil(ctx context.Context, t test.Test, c cluster.Cluster) *gossipUtil { @@ -168,9 +169,10 @@ func newGossipUtil(ctx context.Context, t test.Test, c cluster.Cluster) *gossipU urlMap[i+1] = `http://` + addr } return &gossipUtil{ - waitTime: 30 * time.Second, - urlMap: urlMap, - conn: c.Conn, + waitTime: 30 * time.Second, + urlMap: urlMap, + conn: c.Conn, + httpClient: roachtestutil.DefaultHTTPClient(c, t.L()), } } @@ -179,12 +181,14 @@ type checkGossipFunc func(map[string]gossip.Info) error // checkGossip fetches the gossip infoStore from each node and invokes the // given function. The test passes if the function returns 0 for every node, // retrying for up to the given duration. -func (g *gossipUtil) check(ctx context.Context, c cluster.Cluster, f checkGossipFunc) error { +func (g *gossipUtil) check( + ctx context.Context, c cluster.Cluster, f checkGossipFunc, l *logger.Logger, +) error { return retry.ForDuration(g.waitTime, func() error { var infoStatus gossip.InfoStatus for i := 1; i <= c.Spec().NodeCount; i++ { url := g.urlMap[i] + `/_status/gossip/local` - if err := httputil.GetJSON(http.Client{}, url, &infoStatus); err != nil { + if err := g.httpClient.GetJSON(ctx, url, &infoStatus); err != nil { return errors.Wrapf(err, "failed to get gossip status from node %d", i) } if err := f(infoStatus.Infos); err != nil { @@ -233,13 +237,13 @@ func (g *gossipUtil) checkConnectedAndFunctional( ctx context.Context, t test.Test, c cluster.Cluster, ) { t.L().Printf("waiting for gossip to be connected\n") - if err := g.check(ctx, c, g.hasPeers(c.Spec().NodeCount)); err != nil { + if err := g.check(ctx, c, g.hasPeers(c.Spec().NodeCount), t.L()); err != nil { t.Fatal(err) } - if err := g.check(ctx, c, g.hasClusterID); err != nil { + if err := g.check(ctx, c, g.hasClusterID, t.L()); err != nil { t.Fatal(err) } - if err := g.check(ctx, c, g.hasSentinel); err != nil { + if err := g.check(ctx, c, g.hasSentinel, t.L()); err != nil { t.Fatal(err) } @@ -287,13 +291,13 @@ func runGossipPeerings(ctx context.Context, t test.Test, c cluster.Cluster) { for i := 1; timeutil.Now().Before(deadline); i++ { WaitForReady(ctx, t, c, c.All()) - if err := g.check(ctx, c, g.hasPeers(c.Spec().NodeCount)); err != nil { + if err := g.check(ctx, c, g.hasPeers(c.Spec().NodeCount), t.L()); err != nil { t.Fatal(err) } - if err := g.check(ctx, c, g.hasClusterID); err != nil { + if err := g.check(ctx, c, g.hasClusterID, t.L()); err != nil { t.Fatal(err) } - if err := g.check(ctx, c, g.hasSentinel); err != nil { + if err := g.check(ctx, c, g.hasSentinel, t.L()); err != nil { t.Fatal(err) } t.L().Printf("%d: OK\n", i) @@ -327,8 +331,9 @@ func runGossipRestart(ctx context.Context, t test.Test, c cluster.Cluster) { t.L().Printf("%d: killing all nodes\n", i) c.Stop(ctx, t.L(), option.DefaultStopOpts()) - t.L().Printf("%d: restarting all nodes\n", i) + // Tell the httpClient our saved session cookies are no longer valid after a restart. + g.httpClient.ResetSession() c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) } } @@ -434,10 +439,11 @@ SELECT count(replicas) // connections. This will require node 1 to reach out to the other nodes in // the cluster for gossip info. err = c.RunE(ctx, option.WithNodes(c.Node(1)), - ` ./cockroach start --insecure --background --store={store-dir} `+ + ` ./cockroach start --background --store={store-dir} `+ `--log-dir={log-dir} --cache=10% --max-sql-memory=10% `+ fmt.Sprintf(`--listen-addr=:$[{pgport:1}+1000] --http-port=%d `, adminPorts[0])+ `--join={pghost:1}:{pgport:1} `+ + `--certs-dir=certs `+ `--advertise-addr={pghost:1}:$[{pgport:1}+1000] `+ `> {log-dir}/cockroach.stdout 2> {log-dir}/cockroach.stderr`) if err != nil { @@ -460,7 +466,7 @@ SELECT count(replicas) if i != 1 { return c.Conn(ctx, l, i) } - urls, err := c.ExternalPGUrl(ctx, l, c.Node(1), "" /* tenant */, 0 /* sqlInstance */) + urls, err := c.ExternalPGUrl(ctx, l, c.Node(1), roachprod.PGURLOptions{}) if err != nil { t.Fatal(err) } diff --git a/pkg/cmd/roachtest/tests/hibernate.go b/pkg/cmd/roachtest/tests/hibernate.go index b3e22436fd27..6fb6b44dab21 100644 --- a/pkg/cmd/roachtest/tests/hibernate.go +++ b/pkg/cmd/roachtest/tests/hibernate.go @@ -96,7 +96,8 @@ func registerHibernate(r registry.Registry, opt hibernateOptions) { t.Status("setting up cockroach") startOpts := option.DefaultStartOptsInMemory() startOpts.RoachprodOpts.SQLPort = config.DefaultSQLPort - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.All()) + // Hibernate uses a hardcoded connection string with ssl disabled. + c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(install.SecureOption(false)), c.All()) if opt.dbSetupFunc != nil { opt.dbSetupFunc(ctx, t, c) diff --git a/pkg/cmd/roachtest/tests/import.go b/pkg/cmd/roachtest/tests/import.go index a115ef260a31..72fadc5b9081 100644 --- a/pkg/cmd/roachtest/tests/import.go +++ b/pkg/cmd/roachtest/tests/import.go @@ -141,15 +141,11 @@ func registerImportTPCC(r registry.Registry) { m.Go(hc.Runner) tick, perfBuf := initBulkJobPerfArtifacts(testName, timeout) - workloadStr := `./cockroach workload fixtures import tpcc --warehouses=%d --csv-server='http://localhost:8081' '%s'` + workloadStr := `./cockroach workload fixtures import tpcc --warehouses=%d --csv-server='http://localhost:8081' {pgurl:1}` m.Go(func(ctx context.Context) error { defer dul.Done() defer hc.Done() - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - cmd := fmt.Sprintf(workloadStr, warehouses, pgurl) + cmd := fmt.Sprintf(workloadStr, warehouses) // Tick once before starting the import, and once after to capture the // total elapsed time. This is used by roachperf to compute and display // the average MB/sec per node. @@ -359,22 +355,13 @@ func registerImportDecommissioned(r registry.Registry) { // Decommission a node. nodeToDecommission := 2 t.Status(fmt.Sprintf("decommissioning node %d", nodeToDecommission)) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(nodeToDecommission)) - if err != nil { - t.Fatal(err) - } - c.Run(ctx, option.WithNodes(c.Node(nodeToDecommission)), - fmt.Sprintf(`./cockroach node decommission --insecure --self --wait=all --url=%s`, pgurl)) + c.Run(ctx, option.WithNodes(c.Node(nodeToDecommission)), `./cockroach node decommission --self --wait=all --url={pgurl:2}`) // Wait for a bit for node liveness leases to expire. time.Sleep(10 * time.Second) t.Status("running workload") - pgurl, err = roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - c.Run(ctx, option.WithNodes(c.Node(1)), tpccImportCmd("", warehouses, pgurl)) + c.Run(ctx, option.WithNodes(c.Node(1)), tpccImportCmd("", warehouses, "{pgurl:1}")) } r.Add(registry.TestSpec{ diff --git a/pkg/cmd/roachtest/tests/jasyncsql.go b/pkg/cmd/roachtest/tests/jasyncsql.go index d04ee8823059..8f0a81233e96 100644 --- a/pkg/cmd/roachtest/tests/jasyncsql.go +++ b/pkg/cmd/roachtest/tests/jasyncsql.go @@ -30,7 +30,10 @@ func registerJasyncSQL(r registry.Registry) { } node := c.Node(1) t.Status("setting up cockroach") - c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(), c.All()) + // jasync does not support changing the default sslmode for postgresql, defaulting + // sslmode=disable. See: https://github.com/jasync-sql/jasync-sql/issues/422 + // TODO(darrylwong): If the above issue is addressed we can enable secure mode + c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(install.SecureOption(false)), c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { @@ -93,7 +96,7 @@ func registerJasyncSQL(r registry.Registry) { _ = c.RunE( ctx, option.WithNodes(node), - `cd /mnt/data1/jasync-sql && PGUSER=test_admin PGHOST=localhost PGPORT={pgport:1} PGDATABASE=defaultdb ./gradlew :postgresql-async:test`, + `cd /mnt/data1/jasync-sql && PGHOST=localhost PGUSER=test_admin PGPORT={pgport:1} PGDATABASE=defaultdb ./gradlew :postgresql-async:test`, ) _ = c.RunE(ctx, option.WithNodes(node), `mkdir -p ~/logs/report/jasyncsql-results`) diff --git a/pkg/cmd/roachtest/tests/knex.go b/pkg/cmd/roachtest/tests/knex.go index 3510643e0c2f..553d64f4d3ac 100644 --- a/pkg/cmd/roachtest/tests/knex.go +++ b/pkg/cmd/roachtest/tests/knex.go @@ -12,13 +12,13 @@ package tests import ( "context" + "fmt" "strings" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" - "github.com/cockroachdb/cockroach/pkg/roachprod/config" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/stretchr/testify/require" ) @@ -41,9 +41,7 @@ func registerKnex(r registry.Registry) { } node := c.Node(1) t.Status("setting up cockroach") - startOpts := option.DefaultStartOptsInMemory() - startOpts.RoachprodOpts.SQLPort = config.DefaultSQLPort - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.All()) + c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(), c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) require.NoError(t, err) @@ -57,7 +55,7 @@ func registerKnex(r registry.Registry) { c, node, "create sql database", - `./cockroach sql --insecure -e "CREATE DATABASE test"`, + `./cockroach sql --url={pgurl:1} -e "CREATE DATABASE test"`, ) require.NoError(t, err) @@ -121,12 +119,18 @@ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.co ) require.NoError(t, err) + // Write the knexfile test config into the test suite to use. + // The default test config does not support ssl connections. + err = c.PutString(ctx, knexfile, "/mnt/data1/knex/knexfile.js", 0755, c.Node(1)) + require.NoError(t, err) + t.Status("running knex tests") result, err := c.RunWithDetailsSingleNode( ctx, t.L(), option.WithNodes(node), - `cd /mnt/data1/knex/ && DB='cockroachdb' npm test`, + fmt.Sprintf(`cd /mnt/data1/knex/ && PGUSER=%s PGPASSWORD=%s PGPORT={pgport:1} PGSSLROOTCERT=$HOME/certs/ca.crt \ + KNEX_TEST='/mnt/data1/knex/knexfile.js' DB='cockroachdb' npm test`, install.DefaultUser, install.DefaultPassword), ) rawResultsStr := result.Stdout + result.Stderr t.L().Printf("Test Results: %s", rawResultsStr) @@ -157,3 +161,41 @@ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.co }, }) } + +const knexfile = ` +'use strict'; +/* eslint no-var: 0 */ + +const _ = require('lodash'); + +console.log('Using custom cockroachdb test config'); + +const testIntegrationDialects = ( + process.env.DB || + 'cockroachdb' +).match(/[\w-]+/g); + +const testConfigs = { + cockroachdb: { + adapter: 'cockroachdb', + port: process.env.PGPORT, + host: 'localhost', + database: 'test', + user: process.env.PGUSER, + password: process.env.PGPASSWORD, + ssl: { + rejectUnauthorized: false, + ca: process.env.PGSSLROOTCERT + } + }, +}; + +module.exports = _.reduce( + testIntegrationDialects, + function (res, dialectName) { + res[dialectName] = testConfigs[dialectName]; + return res; + }, + {} +); +` diff --git a/pkg/cmd/roachtest/tests/kv.go b/pkg/cmd/roachtest/tests/kv.go index 48c56c81e245..7943dda30c1c 100644 --- a/pkg/cmd/roachtest/tests/kv.go +++ b/pkg/cmd/roachtest/tests/kv.go @@ -25,7 +25,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/kv" @@ -95,7 +94,7 @@ func registerKV(r registry.Registry) { startOpts.RoachprodOpts.StoreCount = opts.ssds } // Use a secure cluster so we can test with a non-root user. - settings := install.MakeClusterSettings(install.SecureOption(true)) + settings := install.MakeClusterSettings() if opts.globalMVCCRangeTombstone { settings.Env = append(settings.Env, "COCKROACH_GLOBAL_MVCC_RANGE_TOMBSTONE=true") } @@ -119,16 +118,7 @@ func registerKV(r registry.Registry) { } } if opts.sharedProcessMT { - createInMemoryTenant(ctx, t, c, appTenantName, c.Range(1, nodes), false /* secure */) - } - - // Create a user and grant them admin privileges so they can freely - // interact with the cluster. - if _, err := db.ExecContext(ctx, `CREATE USER testuser WITH PASSWORD 'password'`); err != nil { - t.Fatal(err) - } - if _, err := db.ExecContext(ctx, `GRANT admin TO testuser`); err != nil { - t.Fatal(err) + createInMemoryTenant(ctx, t, c, appTenantName, c.Range(1, nodes), true /* secure */) } t.Status("running workload") @@ -181,7 +171,9 @@ func registerKV(r registry.Registry) { if opts.sharedProcessMT { url = fmt.Sprintf(" {pgurl:1-%d:%s}", nodes, appTenantName) } - cmd := "./workload run kv --tolerate-errors --init --user=testuser --password=password" + + cmd := fmt.Sprintf( + "./workload run kv --tolerate-errors --init --user=%s --password=%s", install.DefaultUser, install.DefaultPassword, + ) + histograms + concurrency + splits + duration + readPercent + batchSize + blockSize + sequential + envFlags + url c.Run(ctx, option.WithNodes(c.Node(nodes+1)), cmd) @@ -541,12 +533,7 @@ func registerKVGracefulDraining(r registry.Registry) { // Initialize the database with a lot of ranges so that there are // definitely a large number of leases on the node that we shut down // before it starts draining. - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - c.Run(ctx, option.WithNodes(c.Node(1)), - fmt.Sprintf("./cockroach workload init kv --splits 100 '%s'", pgurl)) + c.Run(ctx, option.WithNodes(c.Node(1)), "./cockroach workload init kv --splits 100 {pgurl:1}") m := c.NewMonitor(ctx, c.Nodes(1, nodes)) m.ExpectDeath() diff --git a/pkg/cmd/roachtest/tests/libpq.go b/pkg/cmd/roachtest/tests/libpq.go index 76022bf25165..98d2bafebc3d 100644 --- a/pkg/cmd/roachtest/tests/libpq.go +++ b/pkg/cmd/roachtest/tests/libpq.go @@ -104,7 +104,9 @@ func registerLibPQ(r registry.Registry) { result, err := c.RunWithDetailsSingleNode( ctx, t.L(), option.WithNodes(node), - fmt.Sprintf(`cd %s && PGPORT={pgport:1} PGUSER=root PGSSLMODE=disable PGDATABASE=postgres go test -list "%s"`, libPQPath, testListRegex), + fmt.Sprintf( + `cd %s && PGPORT={pgport:1} PGUSER=%s PGPASSWORD=%s PGSSLMODE=require PGDATABASE=postgres go test -list "%s"`, + libPQPath, install.DefaultUser, install.DefaultPassword, testListRegex), ) require.NoError(t, err) @@ -131,8 +133,8 @@ func registerLibPQ(r registry.Registry) { _ = c.RunE( ctx, option.WithNodes(node), - fmt.Sprintf("cd %s && PGPORT={pgport:1} PGUSER=test_admin PGSSLMODE=disable PGDATABASE=postgres go test -run %s -v 2>&1 | %s/bin/go-junit-report > %s", - libPQPath, allowedTestsRegExp, goPath, resultsPath), + fmt.Sprintf("cd %s && PGPORT={pgport:1} PGUSER=%s PGPASSWORD=%s PGSSLMODE=require PGDATABASE=postgres go test -run %s -v 2>&1 | %s/bin/go-junit-report > %s", + libPQPath, install.DefaultUser, install.DefaultPassword, allowedTestsRegExp, goPath, resultsPath), ) parseAndSummarizeJavaORMTestsResults( diff --git a/pkg/cmd/roachtest/tests/liquibase.go b/pkg/cmd/roachtest/tests/liquibase.go index a515857ad5dc..2800064f8e33 100644 --- a/pkg/cmd/roachtest/tests/liquibase.go +++ b/pkg/cmd/roachtest/tests/liquibase.go @@ -38,7 +38,8 @@ func registerLiquibase(r registry.Registry) { t.Status("setting up cockroach") startOpts := option.DefaultStartOpts() startOpts.RoachprodOpts.SQLPort = config.DefaultSQLPort - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.All()) + // TODO(darrylwong): if https://github.com/liquibase/liquibase-test-harness/pull/724 is merged, enable secure mode + c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(install.SecureOption(false)), c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { @@ -98,6 +99,7 @@ func registerLiquibase(r registry.Registry) { if err = c.RunE(ctx, option.WithNodes(node), `sudo mkdir /cockroach && sudo ln -sf /home/ubuntu/cockroach /cockroach/cockroach.sh`); err != nil { t.Fatal(err) } + // TODO(darrylwong): once secure mode is enabled, add --certs-dir=certs if err = c.RunE(ctx, option.WithNodes(node), `/mnt/data1/liquibase-test-harness/src/test/resources/docker/setup_db.sh localhost`); err != nil { t.Fatal(err) } @@ -112,6 +114,7 @@ func registerLiquibase(r registry.Registry) { resultsPath = repoDir + "/target/surefire-reports/TEST-liquibase.harness.LiquibaseHarnessSuiteTest.xml" ) + // TODO(darrylwong): once secure mode is enabled, add -DdbUsername=roach -DdbPassword=system cmd := fmt.Sprintf("cd /mnt/data1/liquibase-test-harness/ && "+ "mvn surefire-report:report-only test -Dtest=LiquibaseHarnessSuiteTest "+ "-DdbName=cockroachdb -DdbVersion=20.2 -DoutputDirectory=%s", repoDir) diff --git a/pkg/cmd/roachtest/tests/loss_of_quorum_recovery.go b/pkg/cmd/roachtest/tests/loss_of_quorum_recovery.go index 4edcba146083..8422cbb89d84 100644 --- a/pkg/cmd/roachtest/tests/loss_of_quorum_recovery.go +++ b/pkg/cmd/roachtest/tests/loss_of_quorum_recovery.go @@ -305,7 +305,7 @@ func runRecoverLossOfQuorum(ctx context.Context, t test.Test, c cluster.Cluster, if err := timeutil.RunWithTimeout(ctx, "mark-nodes-decommissioned", 5*time.Minute, func(ctx context.Context) error { decommissionCmd := fmt.Sprintf( - "./cockroach node decommission --wait none --insecure --url={pgurl:%d} 2 3", 1) + "./cockroach node decommission --wait none --url={pgurl:%d} 2 3", 1) return c.RunE(ctx, option.WithNodes(c.Node(controller)), decommissionCmd) }); err != nil { // Timeout means we failed to recover ranges especially system ones @@ -331,8 +331,8 @@ func runRecoverLossOfQuorum(ctx context.Context, t test.Test, c cluster.Cluster, if err := timeutil.RunWithTimeout(ctx, "decommission-removed-nodes", 5*time.Minute, func(ctx context.Context) error { decommissionCmd := fmt.Sprintf( - "./cockroach node decommission --wait all --insecure --url={pgurl:%d} 2 3", 1) - return c.RunE(ctx, option.WithNodes(c.Node(controller)), decommissionCmd) + "./cockroach node decommission --wait all --url={pgurl:%d} 2 3", 1) + return c.RunE(ctx, option.WithNodes(c.Nodes(controller)), decommissionCmd) }); err != nil { // Timeout means we failed to drain all ranges from failed nodes, possibly // because some ranges were not recovered. @@ -447,7 +447,7 @@ func runHalfOnlineRecoverLossOfQuorum( require.NoError(t, err, "infra failure, can't get IP addr of cluster node") require.NotEmpty(t, addrs, "infra failure, can't get IP addr of cluster node") addr := addrs[0] - planCmd := "./cockroach debug recover make-plan --confirm y --insecure --host " + addr + " -o " + planName + planCmd := "./cockroach debug recover make-plan --confirm y --host " + addr + " -o " + planName if err = c.RunE(ctx, option.WithNodes(c.Node(controller)), planCmd); err != nil { t.L().Printf("failed to create plan, test can't proceed assuming unrecoverable cluster: %s", @@ -461,7 +461,7 @@ func runHalfOnlineRecoverLossOfQuorum( } t.L().Printf("staging recovery plan") - applyCommand := "./cockroach debug recover apply-plan --confirm y --insecure --host " + addr + " " + planName + applyCommand := "./cockroach debug recover apply-plan --confirm y --host " + addr + " " + planName c.Run(ctx, option.WithNodes(c.Nodes(controller)), applyCommand) // Ignore node failures because they could fail if recovered ranges @@ -477,7 +477,7 @@ func runHalfOnlineRecoverLossOfQuorum( } t.L().Printf("waiting for nodes to process recovery") - verifyCommand := "./cockroach debug recover verify --insecure --host " + addr + " " + planName + verifyCommand := "./cockroach debug recover verify --host " + addr + " " + planName if err = timeutil.RunWithTimeout(ctx, "wait-for-restart", 2*time.Minute, func(ctx context.Context) error { for { diff --git a/pkg/cmd/roachtest/tests/mixed_version_decommission.go b/pkg/cmd/roachtest/tests/mixed_version_decommission.go index 23b723546ef8..2f2b1b75acb9 100644 --- a/pkg/cmd/roachtest/tests/mixed_version_decommission.go +++ b/pkg/cmd/roachtest/tests/mixed_version_decommission.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil/clusterupgrade" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -135,12 +134,8 @@ func preloadDataStep(target int) versionStep { // Load data into cluster to ensure we have a large enough number of replicas // to move on decommissioning. c := u.c - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } c.Run(ctx, option.WithNodes(c.Node(target)), - `./cockroach workload fixtures import tpcc --warehouses=100`, pgurl) + `./cockroach workload fixtures import tpcc --warehouses=100 {pgurl:1}`) db := c.Conn(ctx, t.L(), target) defer db.Close() if err := WaitFor3XReplication(ctx, t, t.L(), db); err != nil { @@ -156,7 +151,7 @@ func partialDecommissionStep(target, from int, binaryVersion *clusterupgrade.Ver return func(ctx context.Context, t test.Test, u *versionUpgradeTest) { c := u.c c.Run(ctx, option.WithNodes(c.Node(from)), clusterupgrade.CockroachPathForVersion(t, binaryVersion), "node", "decommission", - "--wait=none", "--insecure", strconv.Itoa(target), "--port", fmt.Sprintf("{pgport:%d}", from)) + "--wait=none", strconv.Itoa(target), "--port", fmt.Sprintf("{pgport:%d}", from), "--certs-dir=certs") } } @@ -167,7 +162,7 @@ func recommissionAllStep(from int, binaryVersion *clusterupgrade.Version) versio return func(ctx context.Context, t test.Test, u *versionUpgradeTest) { c := u.c c.Run(ctx, option.WithNodes(c.Node(from)), clusterupgrade.CockroachPathForVersion(t, binaryVersion), "node", "recommission", - "--insecure", c.All().NodeIDsString(), "--port", fmt.Sprintf("{pgport:%d}", from)) + c.All().NodeIDsString(), "--port", fmt.Sprintf("{pgport:%d}", from), "--certs-dir=certs") } } @@ -176,12 +171,8 @@ func recommissionAllStep(from int, binaryVersion *clusterupgrade.Version) versio func fullyDecommissionStep(target, from int, binaryVersion *clusterupgrade.Version) versionStep { return func(ctx context.Context, t test.Test, u *versionUpgradeTest) { c := u.c - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(from)) - if err != nil { - t.Fatal(err) - } c.Run(ctx, option.WithNodes(c.Node(from)), clusterupgrade.CockroachPathForVersion(t, binaryVersion), "node", "decommission", - "--wait=all", "--insecure", strconv.Itoa(target), fmt.Sprintf("--url=%s", pgurl)) + "--wait=all", strconv.Itoa(target), "--port={pgport:1}", "--certs-dir=certs") // If we are decommissioning a target node from the same node, the drain // step will be skipped. In this case, we should not consider the step done diff --git a/pkg/cmd/roachtest/tests/multitenant_distsql.go b/pkg/cmd/roachtest/tests/multitenant_distsql.go index 0cf83d22cf77..79af58b10014 100644 --- a/pkg/cmd/roachtest/tests/multitenant_distsql.go +++ b/pkg/cmd/roachtest/tests/multitenant_distsql.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/util/intsets" "github.com/stretchr/testify/require" @@ -61,7 +62,7 @@ func runMultiTenantDistSQL( // This test sets a smaller default range size than the default due to // performance and resource limitations. We set the minimum range max bytes to // 1 byte to bypass the guardrails. - settings := install.MakeClusterSettings(install.SecureOption(true)) + settings := install.MakeClusterSettings() settings.Env = append(settings.Env, "COCKROACH_MIN_RANGE_MAX_BYTES=1") c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, c.Node(1)) c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, c.Node(2)) @@ -198,7 +199,7 @@ func runMultiTenantDistSQL( if bundle { // Open bundle and verify its contents sqlConnCtx := clisqlclient.Context{} - pgURL, err := c.ExternalPGUrl(ctx, t.L(), c.Node(1), tenantName, 0) + pgURL, err := c.ExternalPGUrl(ctx, t.L(), c.Node(1), roachprod.PGURLOptions{VirtualClusterName: tenantName}) require.NoError(t, err) conn := sqlConnCtx.MakeSQLConn(io.Discard, io.Discard, pgURL[0]) bundles, err := clisqlclient.StmtDiagListBundles(ctx, conn) diff --git a/pkg/cmd/roachtest/tests/multitenant_shared_process.go b/pkg/cmd/roachtest/tests/multitenant_shared_process.go index acbc06904bd4..84aa1ed23ed9 100644 --- a/pkg/cmd/roachtest/tests/multitenant_shared_process.go +++ b/pkg/cmd/roachtest/tests/multitenant_shared_process.go @@ -44,7 +44,7 @@ func registerMultiTenantSharedProcess(r registry.Registry) { // In order to observe the app tenant's db console, create a secure // cluster and add Admin roles to the system and app tenant. - clusterSettings := install.MakeClusterSettings(install.SecureOption(true)) + clusterSettings := install.MakeClusterSettings() c.Start(ctx, t.L(), option.DefaultStartOpts(), clusterSettings, crdbNodes) startOpts := option.DefaultStartSharedVirtualClusterOpts(appTenantName) diff --git a/pkg/cmd/roachtest/tests/multitenant_tpch.go b/pkg/cmd/roachtest/tests/multitenant_tpch.go index 7cae4e7f988d..4c1fdc247afb 100644 --- a/pkg/cmd/roachtest/tests/multitenant_tpch.go +++ b/pkg/cmd/roachtest/tests/multitenant_tpch.go @@ -33,7 +33,7 @@ func runMultiTenantTPCH( secure := true c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.Node(1)) start := func() { - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(secure)), c.All()) + c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(), c.All()) } start() diff --git a/pkg/cmd/roachtest/tests/multitenant_upgrade.go b/pkg/cmd/roachtest/tests/multitenant_upgrade.go index d066dc46da34..802c2da8199a 100644 --- a/pkg/cmd/roachtest/tests/multitenant_upgrade.go +++ b/pkg/cmd/roachtest/tests/multitenant_upgrade.go @@ -94,7 +94,7 @@ func runMultiTenantUpgrade( kvNodes := c.Node(1) - settings := install.MakeClusterSettings(install.BinaryOption(predecessorBinary), install.SecureOption(true)) + settings := install.MakeClusterSettings(install.BinaryOption(predecessorBinary)) c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, kvNodes) const tenant11aHTTPPort, tenant11aSQLPort = 8011, 20011 diff --git a/pkg/cmd/roachtest/tests/multitenant_utils.go b/pkg/cmd/roachtest/tests/multitenant_utils.go index 34154e70425c..526b3c4769eb 100644 --- a/pkg/cmd/roachtest/tests/multitenant_utils.go +++ b/pkg/cmd/roachtest/tests/multitenant_utils.go @@ -201,7 +201,7 @@ func (tn *tenantNode) start(ctx context.Context, t test.Test, c cluster.Cluster, extraArgs..., ) - externalUrls, err := c.ExternalPGUrl(ctx, t.L(), c.Node(tn.node), "" /* tenant */, 0 /* sqlInstance */) + externalUrls, err := c.ExternalPGUrl(ctx, t.L(), c.Node(tn.node), roachprod.PGURLOptions{}) require.NoError(t, err) u, err := url.Parse(externalUrls[0]) require.NoError(t, err) diff --git a/pkg/cmd/roachtest/tests/mvcc_gc.go b/pkg/cmd/roachtest/tests/mvcc_gc.go index 132e79d1b123..c9602c05710d 100644 --- a/pkg/cmd/roachtest/tests/mvcc_gc.go +++ b/pkg/cmd/roachtest/tests/mvcc_gc.go @@ -123,15 +123,11 @@ func runMVCCGC(ctx context.Context, t test.Test, c cluster.Cluster) { t.Fatalf("failed to up-replicate cluster: %s", err) } - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } m := c.NewMonitor(ctx) m.Go(func(ctx context.Context) error { cmd := roachtestutil.NewCommand("./cockroach workload init kv"). Flag("cycle-length", 20000). - Arg("%s", pgurl). + Arg("{pgurl:1}"). String() c.Run(ctx, option.WithNodes(c.Node(1)), cmd) @@ -642,7 +638,7 @@ func sendBatchRequest( } cmd := roachtestutil.NewCommand("./cockroach debug send-kv-batch"). Arg(requestFileName). - Option("insecure"). + Flag("certs-dir", "certs"). Flag("host", fmt.Sprintf("localhost:{pgport:%d}", node)). String() res, err := c.RunWithDetailsSingleNode( diff --git a/pkg/cmd/roachtest/tests/network.go b/pkg/cmd/roachtest/tests/network.go index 4862c01eaca1..733455251e9d 100644 --- a/pkg/cmd/roachtest/tests/network.go +++ b/pkg/cmd/roachtest/tests/network.go @@ -53,7 +53,7 @@ func runNetworkAuthentication(ctx context.Context, t test.Test, c cluster.Cluste // 3 will re-recreate a separate set of certs, which // we don't want. Starting all nodes at once ensures // that they use coherent certs. - settings := install.MakeClusterSettings(install.SecureOption(true)) + settings := install.MakeClusterSettings() // Don't create a backup schedule as this test shuts the cluster down immediately. c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), settings, serverNodes) @@ -326,7 +326,7 @@ func runClientNetworkConnectionTimeout(ctx context.Context, t test.Test, c clust } n := c.Spec().NodeCount serverNodes, clientNode := c.Range(1, n-1), c.Nodes(n) - settings := install.MakeClusterSettings(install.SecureOption(true)) + settings := install.MakeClusterSettings() c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, serverNodes) certsDir := "/home/ubuntu/certs" t.L().Printf("connecting to cluster from roachtest...") diff --git a/pkg/cmd/roachtest/tests/network_logging.go b/pkg/cmd/roachtest/tests/network_logging.go index 2fa81225b5c6..5ad0a327045c 100644 --- a/pkg/cmd/roachtest/tests/network_logging.go +++ b/pkg/cmd/roachtest/tests/network_logging.go @@ -69,7 +69,7 @@ func registerNetworkLogging(r registry.Registry) { startOpts.RoachprodOpts.ExtraArgs = []string{ "--log", logCfg, } - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(install.SecureOption(true)), crdbNodes) + c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), crdbNodes) // Construct pgurls for the workload runner. As a roundabout way of detecting deadlocks, // we set a client timeout on the workload pgclient. If the server becomes unavailable diff --git a/pkg/cmd/roachtest/tests/nodejs_postgres.go b/pkg/cmd/roachtest/tests/nodejs_postgres.go index 68b03e754c8c..bcf1259f3dd5 100644 --- a/pkg/cmd/roachtest/tests/nodejs_postgres.go +++ b/pkg/cmd/roachtest/tests/nodejs_postgres.go @@ -42,7 +42,7 @@ func registerNodeJSPostgres(r registry.Registry) { } node := c.Node(1) t.Status("setting up cockroach") - settings := install.MakeClusterSettings(install.SecureOption(true)) + settings := install.MakeClusterSettings() err := c.StartE(ctx, t.L(), option.DefaultStartOptsInMemory(), settings) require.NoError(t, err) @@ -148,9 +148,9 @@ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.co result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(node), fmt.Sprintf( `cd /mnt/data1/node-postgres/ && sudo \ -PGPORT={pgport:1} PGUSER=%s PGSSLMODE=require PGDATABASE=postgres_node_test \ -PGSSLCERT=$HOME/certs/client.%s.crt PGSSLKEY=$HOME/certs/client.%s.key PGSSLROOTCERT=$HOME/certs/ca.crt yarn test`, - user, user, user, +PGPORT={pgport:1} PGUSER=%[1]s PGSSLMODE=require PGDATABASE=postgres_node_test \ +PGSSLCERT=$HOME/certs/client.%[1]s.crt PGSSLKEY=$HOME/certs/client.%[1]s.key PGSSLROOTCERT=$HOME/certs/ca.crt yarn test`, + user, ), ) diff --git a/pkg/cmd/roachtest/tests/npgsql.go b/pkg/cmd/roachtest/tests/npgsql.go index c6e20c12cfda..4be8784109b8 100644 --- a/pkg/cmd/roachtest/tests/npgsql.go +++ b/pkg/cmd/roachtest/tests/npgsql.go @@ -42,7 +42,7 @@ func registerNpgsql(r registry.Registry) { } node := c.Node(1) t.Status("setting up cockroach") - c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(install.SecureOption(true)), c.All()) + c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(), c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { diff --git a/pkg/cmd/roachtest/tests/pg_regress.go b/pkg/cmd/roachtest/tests/pg_regress.go index 7e9fec6f1c90..dd205f423d81 100644 --- a/pkg/cmd/roachtest/tests/pg_regress.go +++ b/pkg/cmd/roachtest/tests/pg_regress.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod" rperrors "github.com/cockroachdb/cockroach/pkg/roachprod/errors" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/errors" @@ -112,7 +113,7 @@ func initPGRegress(ctx context.Context, t test.Test, c cluster.Cluster) { } } - urls, err := c.InternalPGUrl(ctx, t.L(), node, "" /* tenant */, 0 /* sqlInstance */) + urls, err := c.InternalPGUrl(ctx, t.L(), node, roachprod.PGURLOptions{}) if err != nil { t.Fatal(err) } @@ -149,7 +150,7 @@ func runPGRegress(ctx context.Context, t test.Test, c cluster.Cluster) { initPGRegress(ctx, t, c) node := c.Node(1) - urls, err := c.InternalPGUrl(ctx, t.L(), node, "" /* tenant */, 0 /* sqlInstance */) + urls, err := c.InternalPGUrl(ctx, t.L(), node, roachprod.PGURLOptions{}) if err != nil { t.Fatal(err) } diff --git a/pkg/cmd/roachtest/tests/pgjdbc.go b/pkg/cmd/roachtest/tests/pgjdbc.go index 9e63f12b4161..bd313ec09329 100644 --- a/pkg/cmd/roachtest/tests/pgjdbc.go +++ b/pkg/cmd/roachtest/tests/pgjdbc.go @@ -41,7 +41,7 @@ func registerPgjdbc(r registry.Registry) { } node := c.Node(1) t.Status("setting up cockroach") - c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(install.SecureOption(true)), c.All()) + c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(), c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { diff --git a/pkg/cmd/roachtest/tests/pgx.go b/pkg/cmd/roachtest/tests/pgx.go index 920fb0c0a14c..f365d8c2a198 100644 --- a/pkg/cmd/roachtest/tests/pgx.go +++ b/pkg/cmd/roachtest/tests/pgx.go @@ -116,9 +116,9 @@ func registerPgx(r registry.Registry) { result, err := repeatRunWithDetailsSingleNode( ctx, c, t, node, "run pgx test suite", - "cd /mnt/data1/pgx && "+ - "PGX_TEST_DATABASE='postgresql://test_admin:@localhost:{pgport:1}/pgx_test' go test -v 2>&1 | "+ - "`go env GOPATH`/bin/go-junit-report", + fmt.Sprintf("cd /mnt/data1/pgx && "+ + "PGX_TEST_DATABASE='postgresql://%s:%s@localhost:{pgport:1}/pgx_test?sslmode=require' go test -v 2>&1 | "+ + "`go env GOPATH`/bin/go-junit-report", install.DefaultUser, install.DefaultPassword), ) // Fatal for a roachprod or SSH error. A roachprod error is when result.Err==nil. diff --git a/pkg/cmd/roachtest/tests/pop.go b/pkg/cmd/roachtest/tests/pop.go index 33abf3121d1d..8a118c4aba84 100644 --- a/pkg/cmd/roachtest/tests/pop.go +++ b/pkg/cmd/roachtest/tests/pop.go @@ -19,7 +19,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" - "github.com/cockroachdb/cockroach/pkg/roachprod/config" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/stretchr/testify/require" ) @@ -35,7 +34,9 @@ func registerPop(r registry.Registry) { node := c.Node(1) t.Status("setting up cockroach") startOpts := option.DefaultStartOptsInMemory() - startOpts.RoachprodOpts.SQLPort = config.DefaultSQLPort + // pop expects secure clusters, indicated by cockroach_ssl, to have SQL Port 26259. + // See: https://github.com/gobuffalo/pop/blob/main/database.yml#L26-L28 + startOpts.RoachprodOpts.SQLPort = 26259 c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { @@ -81,22 +82,28 @@ func registerPop(r registry.Registry) { t.Status("building and setting up tests") + // pop expects to find certificates in a specific path. + err = c.RunE(ctx, option.WithNodes(node), "mkdir -p /mnt/data1/pop/crdb/certs") + require.NoError(t, err) + err = c.RunE(ctx, option.WithNodes(node), "cp -r certs /mnt/data1/pop/crdb/") + require.NoError(t, err) + err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && go build -v -tags sqlite -o tsoda ./soda`, popPath)) require.NoError(t, err) - err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && ./tsoda drop -e cockroach -c ./database.yml -p ./testdata/migrations`, popPath)) + err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && ./tsoda drop -e cockroach_ssl -c ./database.yml -p ./testdata/migrations`, popPath)) require.NoError(t, err) - err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && ./tsoda create -e cockroach -c ./database.yml -p ./testdata/migrations`, popPath)) + err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && ./tsoda create -e cockroach_ssl -c ./database.yml -p ./testdata/migrations`, popPath)) require.NoError(t, err) - err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && ./tsoda migrate -e cockroach -c ./database.yml -p ./testdata/migrations`, popPath)) + err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && ./tsoda migrate -e cockroach_ssl -c ./database.yml -p ./testdata/migrations`, popPath)) require.NoError(t, err) t.Status("running pop test suite") // No tests are expected to fail. - err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && SODA_DIALECT=cockroach go test -race -tags sqlite -v ./... -count=1`, popPath)) + err = c.RunE(ctx, option.WithNodes(node), fmt.Sprintf(`cd %s && SODA_DIALECT=cockroach_ssl go test -race -tags sqlite -v ./... -count=1`, popPath)) require.NoError(t, err, "error while running pop tests") } diff --git a/pkg/cmd/roachtest/tests/psycopg.go b/pkg/cmd/roachtest/tests/psycopg.go index 9a4afbc7568f..5369f789d2f7 100644 --- a/pkg/cmd/roachtest/tests/psycopg.go +++ b/pkg/cmd/roachtest/tests/psycopg.go @@ -108,14 +108,15 @@ func registerPsycopg(r registry.Registry) { t.Status("running psycopg test suite") - result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(node), + result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(node), fmt.Sprintf( `cd /mnt/data1/psycopg/ && export PSYCOPG2_TESTDB=defaultdb && - export PSYCOPG2_TESTDB_USER=test_admin && + export PSYCOPG2_TESTDB_USER=%s && + export PSYCOPG2_TESTDB_PASSWORD=%s && export PSYCOPG2_TESTDB_PORT={pgport:1} && export PSYCOPG2_TESTDB_HOST=localhost && make check PYTHON_VERSION=3`, - ) + install.DefaultUser, install.DefaultPassword)) // Fatal for a roachprod or SSH error. A roachprod error is when result.Err==nil. // Proceed for any other (command) errors diff --git a/pkg/cmd/roachtest/tests/query_comparison_util.go b/pkg/cmd/roachtest/tests/query_comparison_util.go index 0e8a25893e37..627615ad9629 100644 --- a/pkg/cmd/roachtest/tests/query_comparison_util.go +++ b/pkg/cmd/roachtest/tests/query_comparison_util.go @@ -109,7 +109,7 @@ func runQueryComparison( return } c.Stop(clusterCtx, t.L(), option.DefaultStopOpts()) - c.Wipe(clusterCtx, false /* preserveCerts */) + c.Wipe(clusterCtx, true /* preserveCerts */) } } diff --git a/pkg/cmd/roachtest/tests/quit.go b/pkg/cmd/roachtest/tests/quit.go index b3913d84b1f2..ac7d97975917 100644 --- a/pkg/cmd/roachtest/tests/quit.go +++ b/pkg/cmd/roachtest/tests/quit.go @@ -14,6 +14,7 @@ import ( "context" "encoding/json" "fmt" + "io" "os" "path/filepath" "strings" @@ -251,21 +252,31 @@ func (q *quitTest) checkNoLeases(ctx context.Context, nodeID int) { q.t.L().Printf("retrieving ranges for node %d\n", i) // Get the report via HTTP. - // Flag -s is to remove progress on stderr, so that the buffer - // contains the JSON of the response and nothing else. adminAddrs, err := q.c.InternalAdminUIAddr(ctx, q.t.L(), q.c.Node(i)) if err != nil { q.Fatal(err) } - result, err := q.c.RunWithDetailsSingleNode(ctx, q.t.L(), option.WithNodes(q.c.Node(i)), - "curl", "-s", fmt.Sprintf("http://%s/_status/ranges/local", - adminAddrs[0])) + url := fmt.Sprintf("https://%s/_status/ranges/local", adminAddrs[0]) + client := roachtestutil.DefaultHTTPClient(q.c, q.t.L(), roachtestutil.HTTPTimeout(15*time.Second)) if err != nil { q.Fatal(err) } + var data []byte + func() { + response, err := client.Get(ctx, url) + if err != nil { + q.Fatal(err) + } + defer response.Body.Close() + data, err = io.ReadAll(response.Body) + if err != nil { + q.Fatal(err) + } + }() + // Persist the response to artifacts to aid debugging. See #75438. _ = os.WriteFile(filepath.Join(q.t.ArtifactsDir(), fmt.Sprintf("status_ranges_n%d.json", i)), - []byte(result.Stdout), 0644, + data, 0644, ) // We need just a subset of the response. Make an ad-hoc // struct with just the bits of interest. @@ -286,7 +297,7 @@ func (q *quitTest) checkNoLeases(ctx context.Context, nodeID int) { } `json:"ranges"` } var details jsonOutput - if err := json.Unmarshal([]byte(result.Stdout), &details); err != nil { + if err := json.Unmarshal(data, &details); err != nil { q.Fatal(err) } // Some sanity check. @@ -385,13 +396,10 @@ func registerQuitTransfersLeases(r registry.Registry) { // kill. If the drain is successful, the leases are transferred // successfully even if if the process terminates non-gracefully. registerTest("drain", "v20.1.0", func(ctx context.Context, t test.Test, c cluster.Cluster, nodeID int) { - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(nodeID)) - if err != nil { - t.Fatal(err) - } result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(c.Node(nodeID)), - "./cockroach", "node", "drain", "--insecure", "--logtostderr=INFO", - fmt.Sprintf("--url=%s", pgurl), + "./cockroach", "node", "drain", "--logtostderr=INFO", + fmt.Sprintf("--port={pgport:%d}", nodeID), + "--certs-dir certs", ) t.L().Printf("cockroach node drain:\n%s\n", result.Stdout+result.Stdout) if err != nil { @@ -432,13 +440,10 @@ func registerQuitTransfersLeases(r registry.Registry) { // - we add one to bring the value back between 1 and NodeCount // inclusive. otherNodeID := (nodeID % c.Spec().NodeCount) + 1 - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(otherNodeID)) - if err != nil { - t.Fatal(err) - } result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(c.Node(otherNodeID)), - "./cockroach", "node", "drain", "--insecure", "--logtostderr=INFO", - fmt.Sprintf("--url=%s", pgurl), + "./cockroach", "node", "drain", "--logtostderr=INFO", + fmt.Sprintf("--port={pgport:%d}", otherNodeID), + "--certs-dir certs", fmt.Sprintf("%d", nodeID), ) t.L().Printf("cockroach node drain:\n%s\n", result.Stdout+result.Stderr) diff --git a/pkg/cmd/roachtest/tests/rapid_restart.go b/pkg/cmd/roachtest/tests/rapid_restart.go index c3cfcdd5bb03..9214da6a5fa2 100644 --- a/pkg/cmd/roachtest/tests/rapid_restart.go +++ b/pkg/cmd/roachtest/tests/rapid_restart.go @@ -76,7 +76,7 @@ func runRapidRestart(ctx context.Context, t test.Test, c cluster.Cluster) { if err != nil { t.Fatal(err) } - base := `http://` + adminUIAddrs[0] + base := `https://` + adminUIAddrs[0] // Torture the prometheus endpoint to prevent regression of #19559. url := base + `/_status/vars` resp, err := httpClient.Get(ctx, url) diff --git a/pkg/cmd/roachtest/tests/rebalance_load.go b/pkg/cmd/roachtest/tests/rebalance_load.go index 9a434d9cf20d..61904ddd2b4a 100644 --- a/pkg/cmd/roachtest/tests/rebalance_load.go +++ b/pkg/cmd/roachtest/tests/rebalance_load.go @@ -95,14 +95,6 @@ func registerRebalanceLoad(r registry.Registry) { if mixedVersion { mvt := mixedversion.NewTest(ctx, t, t.L(), c, roachNodes, mixedversion.NeverUseFixtures, - // The http requests to the admin UI performed by the test don't play - // well with secure clusters. As of the time of writing, they return - // either of the following errors: - // tls: failed to verify certificate: x509: “node” certificate is not standards compliant - // tls: failed to verify certificate: x509: certificate signed by unknown authority - // - // Disable secure mode for simplicity. - mixedversion.ClusterSettingOption(install.SecureOption(false)), mixedversion.ClusterSettingOption(install.ClusterSettingsOption(settings.ClusterSettings)), ) mvt.InMixedVersion("rebalance load run", @@ -337,7 +329,7 @@ func makeStoreCPUFn( return func(ctx context.Context) ([]float64, error) { now := timeutil.Now() resp, err := getMetricsWithSamplePeriod( - ctx, url, startTime, now, statSamplePeriod, tsQueries) + ctx, c, t, url, startTime, now, statSamplePeriod, tsQueries) if err != nil { return nil, err } diff --git a/pkg/cmd/roachtest/tests/restart.go b/pkg/cmd/roachtest/tests/restart.go index a73edcf4bead..fc3c22ace06f 100644 --- a/pkg/cmd/roachtest/tests/restart.go +++ b/pkg/cmd/roachtest/tests/restart.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/util/timeutil" @@ -37,13 +36,8 @@ func runRestart(ctx context.Context, t test.Test, c cluster.Cluster, downDuratio // We don't really need tpcc, we just need a good amount of traffic and a good // amount of data. t.Status("importing tpcc fixture") - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } c.Run(ctx, option.WithNodes(workloadNode), - "./cockroach workload fixtures import tpcc --warehouses=100 --fks=false --checks=false", - pgurl, + "./cockroach workload fixtures import tpcc --warehouses=100 --fks=false --checks=false {pgurl:1}", ) // Wait a full scanner cycle (10m) for the raft log queue to truncate the @@ -90,11 +84,7 @@ func runRestart(ctx context.Context, t test.Test, c cluster.Cluster, downDuratio SELECT count(*) FROM tpcc.order_line; SET TRACING = OFF; SHOW TRACE FOR SESSION;` - pgurl, err = roachtestutil.DefaultPGUrl(ctx, c, t.L(), restartNode) - if err != nil { - t.Fatal(err) - } - c.Run(ctx, option.WithNodes(restartNode), fmt.Sprintf(`./cockroach sql --insecure --url=%s -e "%s"`, pgurl, tracedQ)) + c.Run(ctx, option.WithNodes(restartNode), fmt.Sprintf(`./cockroach sql --url={pgurl:1} -e "%s"`, tracedQ)) if took := timeutil.Since(start); took > downDuration { t.Fatalf(`expected to recover within %s took %s`, downDuration, took) } else { diff --git a/pkg/cmd/roachtest/tests/restore.go b/pkg/cmd/roachtest/tests/restore.go index 7688a0d7ac2c..0a95eed7a2f7 100644 --- a/pkg/cmd/roachtest/tests/restore.go +++ b/pkg/cmd/roachtest/tests/restore.go @@ -1050,7 +1050,7 @@ func verifyMetrics( if err != nil { return err } - url := "http://" + adminUIAddrs[0] + "/ts/query" + url := "https://" + adminUIAddrs[0] + "/ts/query" request := tspb.TimeSeriesQueryRequest{ // Ask for one minute intervals. We can't just ask for the whole hour diff --git a/pkg/cmd/roachtest/tests/roachmart.go b/pkg/cmd/roachtest/tests/roachmart.go index c9d4605358df..ef005d8540cc 100644 --- a/pkg/cmd/roachtest/tests/roachmart.go +++ b/pkg/cmd/roachtest/tests/roachmart.go @@ -17,7 +17,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -51,12 +50,9 @@ func registerRoachmart(r registry.Registry) { } } t.Status("initializing workload") - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } + // See https://github.com/cockroachdb/cockroach/issues/94062 for the --data-loader. - roachmartRun(ctx, 0, "./workload", "init", "roachmart", "--data-loader=INSERT", pgurl) + roachmartRun(ctx, 0, "./workload", "init", "roachmart", "--data-loader=INSERT", "{pgurl:1}") duration := " --duration=" + ifLocal(c, "10s", "10m") diff --git a/pkg/cmd/roachtest/tests/ruby_pg.go b/pkg/cmd/roachtest/tests/ruby_pg.go index 413a893336f8..9231dfcbf4cf 100644 --- a/pkg/cmd/roachtest/tests/ruby_pg.go +++ b/pkg/cmd/roachtest/tests/ruby_pg.go @@ -52,7 +52,9 @@ func registerRubyPG(r registry.Registry) { t.Status("setting up cockroach") startOpts := option.DefaultStartOptsInMemory() startOpts.RoachprodOpts.SQLPort = config.DefaultSQLPort - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.All()) + // TODO(darrylwong): ruby-pg is currently being updated to run on Ubuntu 22.04. + // Once complete, fix up ruby_pg_helpers to accept a tls connection. + c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(install.SecureOption(false)), c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { diff --git a/pkg/cmd/roachtest/tests/rust_postgres.go b/pkg/cmd/roachtest/tests/rust_postgres.go index 024f4c1bd3fa..6a778a50631e 100644 --- a/pkg/cmd/roachtest/tests/rust_postgres.go +++ b/pkg/cmd/roachtest/tests/rust_postgres.go @@ -35,7 +35,11 @@ func registerRustPostgres(r registry.Registry) { // We hardcode port 5433 since that's the port rust-postgres expects. startOpts := option.DefaultStartOptsInMemory() startOpts.RoachprodOpts.SQLPort = 5433 - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.All()) + // rust-postgres currently doesn't support changing the config through + // the environment, which means we can't pass it ssl connection details + // and must run the cluster in insecure mode. + // See: https://github.com/sfackler/rust-postgres/issues/654 + c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(install.SecureOption(false)), c.All()) db := c.Conn(ctx, t.L(), 1) _, err := db.Exec("create user postgres with createdb createlogin createrole cancelquery") if err != nil { diff --git a/pkg/cmd/roachtest/tests/schemachange.go b/pkg/cmd/roachtest/tests/schemachange.go index a88a0e77e332..9e284ff51082 100644 --- a/pkg/cmd/roachtest/tests/schemachange.go +++ b/pkg/cmd/roachtest/tests/schemachange.go @@ -19,7 +19,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -59,11 +58,7 @@ func registerSchemaChangeDuringKV(r registry.Registry) { }) m.Wait() - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - c.Run(ctx, option.WithNodes(c.Node(1)), `./workload init kv --drop --db=test`, pgurl) + c.Run(ctx, option.WithNodes(c.Node(1)), `./workload init kv --drop --db=test {pgurl:1}`) for node := 1; node <= c.Spec().NodeCount; node++ { node := node // TODO(dan): Ideally, the test would fail if this queryload failed, diff --git a/pkg/cmd/roachtest/tests/schemachange_random_load.go b/pkg/cmd/roachtest/tests/schemachange_random_load.go index 4924984175c1..1303770204d3 100644 --- a/pkg/cmd/roachtest/tests/schemachange_random_load.go +++ b/pkg/cmd/roachtest/tests/schemachange_random_load.go @@ -19,7 +19,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -98,11 +97,8 @@ func runSchemaChangeRandomLoad( t.Status("starting cockroach nodes") c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), roachNodes) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - c.Run(ctx, option.WithNodes(loadNode), fmt.Sprintf("./workload init schemachange '%s'", pgurl)) + + c.Run(ctx, option.WithNodes(loadNode), "./workload init schemachange {pgurl:1}") result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(c.Node(1)), "echo", "-n", "{store-dir}") if err != nil { diff --git a/pkg/cmd/roachtest/tests/slow_drain.go b/pkg/cmd/roachtest/tests/slow_drain.go index 625c6c1fefcc..a0829bd22a25 100644 --- a/pkg/cmd/roachtest/tests/slow_drain.go +++ b/pkg/cmd/roachtest/tests/slow_drain.go @@ -20,7 +20,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/testutils" @@ -119,13 +118,9 @@ func runSlowDrain(ctx context.Context, t test.Test, c cluster.Cluster, duration m.Go(func(ctx context.Context) error { drain := func(id int) error { t.Status(fmt.Sprintf("draining node %d", id)) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(id)) - if err != nil { - t.Fatal(err) - } return c.RunE(ctx, option.WithNodes(c.Node(id)), - fmt.Sprintf("./cockroach node drain %d --insecure --drain-wait=%s --url=%s", id, duration.String(), pgurl), + fmt.Sprintf("./cockroach node drain %d --drain-wait=%s --certs-dir=certs --port={pgport:%d}", id, duration.String(), id), ) } return drain(id) diff --git a/pkg/cmd/roachtest/tests/smoketest_secure.go b/pkg/cmd/roachtest/tests/smoketest_secure.go index 9dc17feca690..7b1c051245b8 100644 --- a/pkg/cmd/roachtest/tests/smoketest_secure.go +++ b/pkg/cmd/roachtest/tests/smoketest_secure.go @@ -34,7 +34,7 @@ func registerSecure(r registry.Registry) { Cluster: r.MakeClusterSpec(numNodes), Leases: registry.MetamorphicLeases, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { - settings := install.MakeClusterSettings(install.SecureOption(true)) + settings := install.MakeClusterSettings() c.Start(ctx, t.L(), option.DefaultStartOpts(), settings) db := c.Conn(ctx, t.L(), 1) defer db.Close() @@ -57,7 +57,7 @@ func registerSecure(r registry.Registry) { // multitenantSmokeTest verifies that a secure sql pod can connect to kv server // and that tenant is is properly transmitted via cert. func multitenantSmokeTest(ctx context.Context, t test.Test, c cluster.Cluster) { - settings := install.MakeClusterSettings(install.SecureOption(true)) + settings := install.MakeClusterSettings() c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, c.Node(1)) // make sure connections to kvserver work diff --git a/pkg/cmd/roachtest/tests/sqlalchemy.go b/pkg/cmd/roachtest/tests/sqlalchemy.go index b92ba5b63b20..56c9538d6b1d 100644 --- a/pkg/cmd/roachtest/tests/sqlalchemy.go +++ b/pkg/cmd/roachtest/tests/sqlalchemy.go @@ -143,10 +143,10 @@ func runSQLAlchemy(ctx context.Context, t test.Test, c cluster.Cluster) { // Note that this is expected to return an error, since the test suite // will fail. And it is safe to swallow it here. result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(node), - `source venv/bin/activate && cd /mnt/data1/sqlalchemy-cockroachdb/ && pytest --maxfail=0 \ - --dburi='cockroachdb://test_admin@localhost:{pgport:1}/defaultdb?sslmode=disable&disable_cockroachdb_telemetry=true' \ + fmt.Sprintf(`source venv/bin/activate && cd /mnt/data1/sqlalchemy-cockroachdb/ && pytest --maxfail=0 \ + --dburi='cockroachdb://%s:%s@localhost:{pgport:1}/defaultdb?sslmode=require&disable_cockroachdb_telemetry=true' \ test/test_suite_sqlalchemy.py - `) + `, install.DefaultUser, install.DefaultPassword)) // Fatal for a roachprod or SSH error. A roachprod error is when result.Err==nil. // Proceed for any other (command) errors diff --git a/pkg/cmd/roachtest/tests/status_server.go b/pkg/cmd/roachtest/tests/status_server.go index c513848e371d..fdf16edf2372 100644 --- a/pkg/cmd/roachtest/tests/status_server.go +++ b/pkg/cmd/roachtest/tests/status_server.go @@ -19,16 +19,19 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/server/serverpb" - "github.com/cockroachdb/cockroach/pkg/util/httputil" "github.com/cockroachdb/cockroach/pkg/util/retry" ) func runStatusServer(ctx context.Context, t test.Test, c cluster.Cluster) { c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) + // The status endpoints below may take a while to produce their answer, maybe more + // than the 3 second timeout of the default http client. + client := roachtestutil.DefaultHTTPClient(c, t.L(), roachtestutil.HTTPTimeout(15*time.Second)) // Get the ids for each node. idMap := make(map[int]roachpb.NodeID) @@ -39,39 +42,34 @@ func runStatusServer(ctx context.Context, t test.Test, c cluster.Cluster) { } for i, addr := range adminUIAddrs { var details serverpb.DetailsResponse - url := `http://` + addr + `/_status/details/local` + url := `https://` + addr + `/_status/details/local` // Use a retry-loop when populating the maps because we might be trying to // talk to the servers before they are responding to status requests // (resulting in 404's). if err := retry.ForDuration(10*time.Second, func() error { - return httputil.GetJSON(http.Client{}, url, &details) + return client.GetJSON(ctx, url, &details) }); err != nil { t.Fatal(err) } idMap[i+1] = details.NodeID - urlMap[i+1] = `http://` + addr + urlMap[i+1] = `https://` + addr } - // The status endpoints below may take a while to produce their answer, maybe more - // than the 3 second timeout of the default http client. - httpClient := httputil.NewClientWithTimeout(15 * time.Second) - // get performs an HTTP GET to the specified path for a specific node. - get := func(base, rel string) []byte { - url := base + rel - resp, err := httpClient.Get(context.TODO(), url) + get := func(path string, httpClient *roachtestutil.RoachtestHTTPClient) []byte { + resp, err := httpClient.Get(ctx, path) if err != nil { - t.Fatalf("could not GET %s - %s", url, err) + t.Fatalf("could not GET %s - %s", path, err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { - t.Fatalf("could not read body for %s - %s", url, err) + t.Fatalf("could not read body for %s - %s", path, err) } if resp.StatusCode != http.StatusOK { - t.Fatalf("could not GET %s - statuscode: %d - body: %s", url, resp.StatusCode, body) + t.Fatalf("could not GET %s - statuscode: %d - body: %s", path, resp.StatusCode, body) } - t.L().Printf("OK response from %s\n", url) + t.L().Printf("OK response from %s\n", path) return body } @@ -85,29 +83,32 @@ func runStatusServer(ctx context.Context, t test.Test, c cluster.Cluster) { } var details serverpb.DetailsResponse for _, urlID := range urlIDs { - if err := httputil.GetJSON(http.Client{}, url+`/_status/details/`+urlID, &details); err != nil { + if err := client.GetJSON(ctx, url+`/_status/details/`+urlID, &details); err != nil { t.Fatalf("unable to parse details - %s", err) } if details.NodeID != expectedNodeID { t.Fatalf("%d calling %s: node ids don't match - expected %d, actual %d", nodeID, urlID, expectedNodeID, details.NodeID) } - - get(url, fmt.Sprintf("/_status/gossip/%s", urlID)) - get(url, fmt.Sprintf("/_status/nodes/%s", urlID)) - get(url, fmt.Sprintf("/_status/logfiles/%s", urlID)) - get(url, fmt.Sprintf("/_status/logs/%s", urlID)) - get(url, fmt.Sprintf("/_status/stacks/%s", urlID)) + endpoints := []string{ + fmt.Sprintf("%s/_status/gossip/%s", url, urlID), + fmt.Sprintf("%s/_status/nodes/%s", url, urlID), + fmt.Sprintf("%s/_status/logfiles/%s", url, urlID), + fmt.Sprintf("%s/_status/logs/%s", url, urlID), + fmt.Sprintf("%s/_status/stacks/%s", url, urlID), + } + for _, endpoint := range endpoints { + get(endpoint, client) + } } - - get(url, "/_status/vars") + get(url+"/_status/vars", client) } // Check local response for the every node. for i := 1; i <= c.Spec().NodeCount; i++ { id := idMap[i] checkNode(urlMap[i], id, id, id) - get(urlMap[i], "/_status/nodes") + get(urlMap[i]+"/_status/nodes", client) } // Proxy from the first node to the last node. diff --git a/pkg/cmd/roachtest/tests/sysbench.go b/pkg/cmd/roachtest/tests/sysbench.go index b44cfa38d376..b53a5d76280a 100644 --- a/pkg/cmd/roachtest/tests/sysbench.go +++ b/pkg/cmd/roachtest/tests/sysbench.go @@ -75,8 +75,8 @@ func (o *sysbenchOptions) cmd(haproxy bool) string { --db-driver=pgsql \ --pgsql-host=%s \ --pgsql-port=%s \ - --pgsql-user=root \ - --pgsql-password= \ + --pgsql-user=%s \ + --pgsql-password=%s \ --pgsql-db=sysbench \ --report-interval=1 \ --time=%d \ @@ -87,6 +87,8 @@ func (o *sysbenchOptions) cmd(haproxy bool) string { %s`, pghost, pgport, + install.DefaultUser, + install.DefaultPassword, int(o.duration.Seconds()), o.concurrency, o.tables, @@ -109,7 +111,12 @@ func runSysbench(ctx context.Context, t test.Test, c cluster.Cluster, opts sysbe if err = c.Install(ctx, t.L(), loadNode, "haproxy"); err != nil { t.Fatal(err) } - c.Run(ctx, option.WithNodes(loadNode), "./cockroach gen haproxy --insecure --url {pgurl:1}") + // cockroach gen haproxy does not support specifying a non root user + pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(1), install.AuthRootCert) + if err != nil { + t.Fatal(err) + } + c.Run(ctx, option.WithNodes(loadNode), fmt.Sprintf("./cockroach gen haproxy --url %s", pgurl)) c.Run(ctx, option.WithNodes(loadNode), "haproxy -f haproxy.cfg -D") t.Status("installing sysbench") @@ -120,11 +127,11 @@ func runSysbench(ctx context.Context, t test.Test, c cluster.Cluster, opts sysbe m := c.NewMonitor(ctx, roachNodes) m.Go(func(ctx context.Context) error { t.Status("preparing workload") - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(1)) + pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(1), install.AuthRootCert) if err != nil { t.Fatal(err) } - c.Run(ctx, option.WithNodes(c.Node(1)), fmt.Sprintf(`./cockroach sql --insecure --url=%s -e "CREATE DATABASE sysbench"`, pgurl)) + c.Run(ctx, option.WithNodes(c.Node(1)), fmt.Sprintf(`./cockroach sql --url=%s -e "CREATE DATABASE sysbench"`, pgurl)) c.Run(ctx, option.WithNodes(loadNode), opts.cmd(false /* haproxy */)+" prepare") t.Status("running workload") diff --git a/pkg/cmd/roachtest/tests/tlp.go b/pkg/cmd/roachtest/tests/tlp.go index 7a331ce300a2..8a21da6fb430 100644 --- a/pkg/cmd/roachtest/tests/tlp.go +++ b/pkg/cmd/roachtest/tests/tlp.go @@ -80,7 +80,7 @@ func runTLP(ctx context.Context, t test.Test, c cluster.Cluster) { return } c.Stop(ctx, t.L(), option.DefaultStopOpts()) - c.Wipe(ctx, false /* preserveCerts */) + c.Wipe(ctx, true /* preserveCerts */) } } diff --git a/pkg/cmd/roachtest/tests/tpcc.go b/pkg/cmd/roachtest/tests/tpcc.go index 2a6093df01c4..b11ea3883482 100644 --- a/pkg/cmd/roachtest/tests/tpcc.go +++ b/pkg/cmd/roachtest/tests/tpcc.go @@ -195,15 +195,8 @@ func setupTPCC( case usingExistingData: // Do nothing. case usingImport: - l.Printf("loading fixture" + estimatedSetupTimeStr) - pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, l, c.Nodes(1)) - if err != nil { - t.Fatal(err) - } - c.Run(ctx, - option.WithNodes(crdbNodes[:1]), - tpccImportCmd(opts.DB, opts.Warehouses, opts.ExtraSetupArgs, pgurl), - ) + t.Status("loading fixture" + estimatedSetupTimeStr) + c.Run(ctx, option.WithNodes(crdbNodes[:1]), tpccImportCmd(opts.DB, opts.Warehouses, opts.ExtraSetupArgs, "{pgurl:1}")) case usingInit: l.Printf("initializing tables" + estimatedSetupTimeStr) extraArgs := opts.ExtraSetupArgs @@ -1391,10 +1384,7 @@ func loadTPCCBench( if b.SharedProcessMT { pgurl = fmt.Sprintf("{pgurl%s:%s}", roachNodes[:1], appTenantName) } else { - pgurl, err = roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Nodes(1)) - if err != nil { - t.Fatal(err) - } + pgurl = "{pgurl:1}" } cmd := tpccImportCmd("", loadWarehouses, loadArgs, pgurl) if err = c.RunE(ctx, option.WithNodes(roachNodes[:1]), cmd); err != nil { @@ -1483,7 +1473,12 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen if err := c.Install(ctx, t.L(), loadNodes, "haproxy"); err != nil { t.Fatal(err) } - c.Run(ctx, option.WithNodes(loadNodes), "./cockroach gen haproxy --insecure --url {pgurl:1}") + // cockroach gen haproxy does not support specifying a non root user + pgurl, err := roachtestutil.DefaultPGUrl(ctx, c, t.L(), c.Node(1), install.AuthRootCert) + if err != nil { + t.Fatal(err) + } + c.Run(ctx, option.WithNodes(loadNodes), fmt.Sprintf("./cockroach gen haproxy --url %s", pgurl)) // Increase the maximum connection limit to ensure that no TPC-C // load gen workers get stuck during connection initialization. // 10k warehouses requires at least 20,000 connections, so add a diff --git a/pkg/cmd/roachtest/tests/tpcdsvec.go b/pkg/cmd/roachtest/tests/tpcdsvec.go index d0f2b96f1201..08004f5c95c8 100644 --- a/pkg/cmd/roachtest/tests/tpcdsvec.go +++ b/pkg/cmd/roachtest/tests/tpcdsvec.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/workload/tpcds" @@ -94,7 +95,7 @@ WITH unsafe_restore_incompatible_version; // We additionally open fresh connections for each query. setStmtTimeout := fmt.Sprintf("SET statement_timeout='%s';", timeout) firstNode := c.Node(1) - urls, err := c.ExternalPGUrl(ctx, t.L(), firstNode, "" /* tenant */, 0 /* sqlInstance */) + urls, err := c.ExternalPGUrl(ctx, t.L(), firstNode, roachprod.PGURLOptions{}) if err != nil { t.Fatal(err) } diff --git a/pkg/cmd/roachtest/tests/tpce.go b/pkg/cmd/roachtest/tests/tpce.go index 85e70bec2bcb..cbff7f36df1a 100644 --- a/pkg/cmd/roachtest/tests/tpce.go +++ b/pkg/cmd/roachtest/tests/tpce.go @@ -54,8 +54,8 @@ type tpceConnectionOpts struct { const ( defaultFixtureBucket = "gs://cockroach-fixtures-us-east1/tpce-csv" - defaultUser = "root" - defaultPassword = "" + defaultUser = install.DefaultUser + defaultPassword = install.DefaultPassword ) func defaultTPCEConnectionOpts() tpceConnectionOpts { diff --git a/pkg/cmd/roachtest/tests/tpchvec.go b/pkg/cmd/roachtest/tests/tpchvec.go index 5d160fa7591d..2f2976ada5f7 100644 --- a/pkg/cmd/roachtest/tests/tpchvec.go +++ b/pkg/cmd/roachtest/tests/tpchvec.go @@ -567,17 +567,25 @@ func smithcmpTestRun( ) { runConfig := tc.getRunConfig() smithcmpPreTestRunHook(ctx, t, c, conn, runConfig.clusterSetups[0]) - const ( - configFile = `tpchvec_smithcmp.toml` - configURL = `https://raw.githubusercontent.com/cockroachdb/cockroach/master/pkg/cmd/roachtest/tests/` + configFile - ) + const configFile = `tpchvec_smithcmp.toml` + firstNode := c.Node(1) - if err := c.RunE(ctx, option.WithNodes(firstNode), fmt.Sprintf("curl %s > %s", configURL, configFile)); err != nil { + if err := c.PutE(ctx, t.L(), "./pkg/cmd/roachtest/tests/"+configFile, configFile); err != nil { + t.Fatal(err) + } + port, err := c.SQLPorts(ctx, t.L(), c.Node(1), "", 0) + if err != nil { t.Fatal(err) } - // smithcmp cannot access the pgport env variable, so we must edit the config file here - // to tell it the port to use. - if err := c.RunE(ctx, option.WithNodes(firstNode), fmt.Sprintf(`port=$(echo -n {pgport:1}) && sed -i "s|26257|$port|g" %s`, configFile)); err != nil { + // We don't know the pgurl ahead of time, so we must edit it into the config file. + // We create the URL manually, opposed to using DefaultPgUrl(), as we need to: + // Escape & as it's a special character in sed. + // Use postgresql instead of postgres. + // Use tpch and localhost. + pgurl := fmt.Sprintf( + "postgresql://%[1]s:%[2]s@localhost:%[3]d/tpch?sslcert=certs%%2Fclient.%[1]s.crt\\&sslkey=certs%%2Fclient.%[1]s.key\\&sslmode=verify-full\\&sslrootcert=certs%%2Fca.crt", + install.DefaultUser, install.DefaultPassword, port[0]) + if err := c.RunE(ctx, option.WithNodes(firstNode), fmt.Sprintf(`sed -i "s|PG_Connection_String|%s|g" %s`, pgurl, configFile)); err != nil { t.Fatal(err) } cmd := fmt.Sprintf("./%s %s", tpchVecSmithcmp, configFile) @@ -605,7 +613,7 @@ func runTPCHVec( if _, err := singleTenantConn.Exec("SET CLUSTER SETTING kv.range_merge.queue_enabled = false;"); err != nil { t.Fatal(err) } - conn = createInMemoryTenantWithConn(ctx, t, c, appTenantName, c.All(), false /* secure */) + conn = createInMemoryTenantWithConn(ctx, t, c, appTenantName, c.All(), c.IsSecure() /* secure */) } else { conn = c.Conn(ctx, t.L(), 1) disableMergeQueue = true @@ -613,7 +621,7 @@ func runTPCHVec( t.Status("restoring TPCH dataset for Scale Factor 1") if err := loadTPCHDataset( - ctx, t, c, conn, 1 /* sf */, c.NewMonitor(ctx), c.All(), disableMergeQueue, false, /* secure */ + ctx, t, c, conn, 1 /* sf */, c.NewMonitor(ctx), c.All(), disableMergeQueue, c.IsSecure(), /* secure */ ); err != nil { t.Fatal(err) } diff --git a/pkg/cmd/roachtest/tests/tpchvec_smithcmp.toml b/pkg/cmd/roachtest/tests/tpchvec_smithcmp.toml index 951103a22d53..f25df1083b49 100644 --- a/pkg/cmd/roachtest/tests/tpchvec_smithcmp.toml +++ b/pkg/cmd/roachtest/tests/tpchvec_smithcmp.toml @@ -504,15 +504,18 @@ ORDER BY # thus be executing the same query each time. Queries that don't change # should be tested in other places; smithcmp is for random testing. +# We use a placeholder for addr so we can easily +# find and replace it with a valid connection string. + [databases.vec-off] -addr = "postgresql://root@localhost:26257/tpch?sslmode=disable" +addr = "PG_Connection_String" allowmutations = true initsql = """ set vectorize=off; """ [databases.vec-on] -addr = "postgresql://root@localhost:26257/tpch?sslmode=disable" +addr = "PG_Connection_String" allowmutations = true initsql = """ set vectorize=on; diff --git a/pkg/cmd/roachtest/tests/ts_util.go b/pkg/cmd/roachtest/tests/ts_util.go index 3330318b6643..e922c7247621 100644 --- a/pkg/cmd/roachtest/tests/ts_util.go +++ b/pkg/cmd/roachtest/tests/ts_util.go @@ -12,14 +12,13 @@ package tests import ( "context" - "net/http" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/ts/tspb" - "github.com/cockroachdb/cockroach/pkg/util/httputil" ) // tsQueryType represents the type of the time series query to retrieve. In @@ -48,9 +47,14 @@ type tsQuery struct { } func mustGetMetrics( - ctx context.Context, t test.Test, adminURL string, start, end time.Time, tsQueries []tsQuery, + ctx context.Context, + c cluster.Cluster, + t test.Test, + adminURL string, + start, end time.Time, + tsQueries []tsQuery, ) tspb.TimeSeriesQueryResponse { - response, err := getMetrics(ctx, adminURL, start, end, tsQueries) + response, err := getMetrics(ctx, c, t, adminURL, start, end, tsQueries) if err != nil { t.Fatal(err) } @@ -58,13 +62,20 @@ func mustGetMetrics( } func getMetrics( - ctx context.Context, adminURL string, start, end time.Time, tsQueries []tsQuery, + ctx context.Context, + c cluster.Cluster, + t test.Test, + adminURL string, + start, end time.Time, + tsQueries []tsQuery, ) (tspb.TimeSeriesQueryResponse, error) { - return getMetricsWithSamplePeriod(ctx, adminURL, start, end, defaultSamplePeriod, tsQueries) + return getMetricsWithSamplePeriod(ctx, c, t, adminURL, start, end, defaultSamplePeriod, tsQueries) } func getMetricsWithSamplePeriod( ctx context.Context, + c cluster.Cluster, + t test.Test, adminURL string, start, end time.Time, samplePeriod time.Duration, @@ -103,7 +114,8 @@ func getMetricsWithSamplePeriod( Queries: queries, } var response tspb.TimeSeriesQueryResponse - err := httputil.PostProtobuf(ctx, http.Client{Timeout: 500 * time.Millisecond}, url, &request, &response) + client := roachtestutil.DefaultHTTPClient(c, t.L(), roachtestutil.HTTPTimeout(500*time.Millisecond)) + err := client.PostProtobuf(ctx, url, &request, &response) return response, err } @@ -122,7 +134,7 @@ func verifyTxnPerSecond( t.Fatal(err) } adminURL := adminUIAddrs[0] - response := mustGetMetrics(ctx, t, adminURL, start, end, []tsQuery{ + response := mustGetMetrics(ctx, c, t, adminURL, start, end, []tsQuery{ {name: "cr.node.txn.commits", queryType: rate}, {name: "cr.node.txn.commits", queryType: total}, }) @@ -173,7 +185,7 @@ func verifyLookupsPerSec( t.Fatal(err) } adminURL := adminUIAddrs[0] - response := mustGetMetrics(ctx, t, adminURL, start, end, []tsQuery{ + response := mustGetMetrics(ctx, c, t, adminURL, start, end, []tsQuery{ {name: "cr.node.distsender.rangelookups", queryType: rate}, }) diff --git a/pkg/cmd/roachtest/tests/typeorm.go b/pkg/cmd/roachtest/tests/typeorm.go index 6f37e255e3db..4cacbd2b0606 100644 --- a/pkg/cmd/roachtest/tests/typeorm.go +++ b/pkg/cmd/roachtest/tests/typeorm.go @@ -176,9 +176,11 @@ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.co t.Fatal(err) } - t.Status("running TypeORM test suite - approx 12 mins") + // We have to pass in the root cert with NODE_EXTRA_CA_CERTS because the JSON + // config only accepts the actual certificate contents and not a path. + t.Status("running TypeORM test suite - approx 2 mins") result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(node), - `cd /mnt/data1/typeorm/ && npm test`, + `cd /mnt/data1/typeorm/ && NODE_EXTRA_CA_CERTS=$HOME/certs/ca.crt npm test`, ) rawResults := result.Stdout + result.Stderr t.L().Printf("Test Results: %s", rawResults) @@ -226,7 +228,7 @@ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.co // This full config is required, but notice that all the non-cockroach databases // are set to skip. Some of the unit tests look for a specific config, like // sqlite and will fail if it is not present. -const typeORMConfigJSON = ` +var typeORMConfigJSON = fmt.Sprintf(` [ { "skip": true, @@ -295,9 +297,15 @@ const typeORMConfigJSON = ` "type": "cockroachdb", "host": "localhost", "port": {pgport:1}, - "username": "test_admin", - "password": "", - "database": "defaultdb" + "username": "%s", + "password": "%s", + "database": "defaultdb", + "ssl": true, + "extra": { + "ssl": { + "rejectUnauthorized": true + } + } }, { "skip": true, @@ -309,4 +317,4 @@ const typeORMConfigJSON = ` "useNewUrlParser": true } ] -` +`, install.DefaultUser, install.DefaultPassword) diff --git a/pkg/cmd/roachtest/tests/util.go b/pkg/cmd/roachtest/tests/util.go index a4105c836405..85c5b9102978 100644 --- a/pkg/cmd/roachtest/tests/util.go +++ b/pkg/cmd/roachtest/tests/util.go @@ -20,9 +20,9 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" - "github.com/cockroachdb/cockroach/pkg/util/httputil" "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" @@ -41,8 +41,9 @@ func WaitFor3XReplication(ctx context.Context, t test.Test, l *logger.Logger, db func WaitForReady( ctx context.Context, t test.Test, c cluster.Cluster, nodes option.NodeListOption, ) { + client := roachtestutil.DefaultHTTPClient(c, t.L()) checkReady := func(ctx context.Context, url string) error { - resp, err := httputil.Get(ctx, url) + resp, err := client.Get(ctx, url) if err != nil { return err } @@ -63,7 +64,7 @@ func WaitForReady( require.NoError(t, timeutil.RunWithTimeout( ctx, "waiting for ready", time.Minute, func(ctx context.Context) error { for i, adminAddr := range adminAddrs { - url := fmt.Sprintf(`http://%s/health?ready=1`, adminAddr) + url := fmt.Sprintf(`https://%s/health?ready=1`, adminAddr) for err := checkReady(ctx, url); err != nil; err = checkReady(ctx, url) { t.L().Printf("n%d not ready, retrying: %s", nodes[i], err) diff --git a/pkg/cmd/roachtest/tests/validate_system_schema_after_version_upgrade.go b/pkg/cmd/roachtest/tests/validate_system_schema_after_version_upgrade.go index 87d8bce438e0..7d8867d5ebd0 100644 --- a/pkg/cmd/roachtest/tests/validate_system_schema_after_version_upgrade.go +++ b/pkg/cmd/roachtest/tests/validate_system_schema_after_version_upgrade.go @@ -68,7 +68,7 @@ func runValidateSystemSchemaAfterVersionUpgrade( t.Fatal(err) } expected = obtainSystemSchema(ctx, t.L(), c, 1) - c.Wipe(ctx, false /* preserveCerts */, c.All()) + c.Wipe(ctx, true /* preserveCerts */, c.All()) mvt := mixedversion.NewTest(ctx, t, t.L(), c, c.All(), // Fixtures are generated on a version that's too old for this test. diff --git a/pkg/configprofiles/profiles.go b/pkg/configprofiles/profiles.go index 9b9d07669ea7..3c203659b56c 100644 --- a/pkg/configprofiles/profiles.go +++ b/pkg/configprofiles/profiles.go @@ -132,13 +132,14 @@ var virtClusterInitTasks = []autoconfigpb.Task{ var virtClusterWithAppServiceInitTasks = append( virtClusterInitTasks[:len(virtClusterInitTasks):len(virtClusterInitTasks)], makeTask("create an application virtual cluster", - nil, /* nonTxnSQL */ - /* txnSQL */ []string{ + /* noTxnSQL */ []string{ // Create the app tenant record. - "CREATE VIRTUAL CLUSTER application", + "CREATE VIRTUAL CLUSTER IF NOT EXISTS application", // Run the service for the application tenant. "ALTER VIRTUAL CLUSTER application START SERVICE SHARED", }, + nil, /* txnSQL */ + ), makeTask("activate application virtual cluster", /* nonTxnSQL */ []string{ diff --git a/pkg/multitenant/mtinfopb/info.go b/pkg/multitenant/mtinfopb/info.go index 9d25fb830fa0..3c3db3e24d00 100644 --- a/pkg/multitenant/mtinfopb/info.go +++ b/pkg/multitenant/mtinfopb/info.go @@ -36,8 +36,13 @@ const ( // This mode causes KV nodes to spontaneously start the SQL service // for the tenant. ServiceModeShared TenantServiceMode = 2 + + // ServiceModeStopping says that the service was previusly in + // ServiceModeShared but is in the process of stopping. + ServiceModeStopping TenantServiceMode = 3 + // MaxServiceMode is a sentinel value. - MaxServiceMode TenantServiceMode = ServiceModeShared + MaxServiceMode TenantServiceMode = ServiceModeStopping ) // String implements fmt.Stringer. @@ -49,6 +54,8 @@ func (s TenantServiceMode) String() string { return "external" case ServiceModeShared: return "shared" + case ServiceModeStopping: + return "stopping" default: return fmt.Sprintf("unimplemented-%d", int(s)) } diff --git a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/authorizer.go b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/authorizer.go index b239620d9c86..e2f7d40d526e 100644 --- a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/authorizer.go +++ b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/authorizer.go @@ -109,7 +109,7 @@ func (a *Authorizer) HasCapabilityForBatch( entry, mode := a.getMode(ctx, tenID) switch mode { case authorizerModeOn: - if entry.ServiceMode == mtinfopb.ServiceModeNone { + if entry.ServiceMode == mtinfopb.ServiceModeNone || entry.ServiceMode == mtinfopb.ServiceModeStopping { return errors.Newf("operation not allowed when in service mode %q", entry.ServiceMode) } return a.capCheckForBatch(ctx, tenID, ba, entry) diff --git a/pkg/roachprod/install/cluster_settings.go b/pkg/roachprod/install/cluster_settings.go index 6984f1b32306..3583e97c6d4f 100644 --- a/pkg/roachprod/install/cluster_settings.go +++ b/pkg/roachprod/install/cluster_settings.go @@ -110,7 +110,7 @@ func MakeClusterSettings(opts ...ClusterSettingOption) ClusterSettings { Binary: config.Binary, Tag: "", PGUrlCertsDir: "./certs", - Secure: false, + Secure: true, UseTreeDist: true, Env: config.DefaultEnvVars(), NumRacks: 0, diff --git a/pkg/roachprod/install/cluster_synced.go b/pkg/roachprod/install/cluster_synced.go index 5470e188045a..7c818151cee2 100644 --- a/pkg/roachprod/install/cluster_synced.go +++ b/pkg/roachprod/install/cluster_synced.go @@ -1641,6 +1641,7 @@ fi %[1]s cert create-ca --certs-dir=certs --ca-key=certs/ca.key %[1]s cert create-client root --certs-dir=certs --ca-key=certs/ca.key $TENANT_SCOPE_OPT %[1]s cert create-client testuser --certs-dir=certs --ca-key=certs/ca.key $TENANT_SCOPE_OPT +%[1]s cert create-client roach --certs-dir=certs --ca-key=certs/ca.key $TENANT_SCOPE_OPT %[1]s cert create-node %[2]s --certs-dir=certs --ca-key=certs/ca.key tar cvf %[3]s certs `, cockroachNodeBinary(c, 1), strings.Join(nodeNames, " "), certsTarName) @@ -2577,7 +2578,7 @@ func (c *SyncedCluster) pgurls( if err != nil { return nil, err } - m[node] = c.NodeURL(host, desc.Port, virtualClusterName, desc.ServiceMode) + m[node] = c.NodeURL(host, desc.Port, virtualClusterName, desc.ServiceMode, AuthRootCert) } return m, nil } diff --git a/pkg/roachprod/install/cockroach.go b/pkg/roachprod/install/cockroach.go index b514cef1b5ef..c5126ad060e4 100644 --- a/pkg/roachprod/install/cockroach.go +++ b/pkg/roachprod/install/cockroach.go @@ -485,26 +485,67 @@ func (c *SyncedCluster) CertsDir(node Node) string { return "certs" } +type PGAuthMode int + +const ( + // AuthRootCert authenticates using the root user and root cert + root client certs. + // Root authentication skips verification paths and is not reflective of how real + // users authenticate. + // TODO(darrylwong): Minimize the amount of root user authentication used. + AuthRootCert PGAuthMode = iota + // AuthUserPassword authenticates using the default user and password. Since + // no certs are specified, sslmode is set to allow. Note this form of auth + // only works if sslmode=allow is an option, i.e. cockroach sql. + // AuthUserCert should be used instead most of the time, except when + // certificates don't exist. + AuthUserPassword + // AuthUserCert authenticates using the default user and password, as well + // as the root cert and client certs. This uses sslmode=verify-full and will + // verify that the certificate is valid. This is the preferred mode of + // authentication. + AuthUserCert + + DefaultUser = "roach" + + DefaultPassword = SystemInterfaceName +) + // NodeURL constructs a postgres URL. If sharedTenantName is not empty, it will // be used as the virtual cluster name in the URL. This is used to connect to a // shared process running services for multiple virtual clusters. func (c *SyncedCluster) NodeURL( - host string, port int, virtualClusterName string, serviceMode ServiceMode, + host string, port int, virtualClusterName string, serviceMode ServiceMode, auth PGAuthMode, ) string { var u url.URL - u.User = url.User("root") u.Scheme = "postgres" + u.User = url.User("root") u.Host = fmt.Sprintf("%s:%d", host, port) v := url.Values{} if c.Secure { - v.Add("sslcert", c.PGUrlCertsDir+"/client.root.crt") - v.Add("sslkey", c.PGUrlCertsDir+"/client.root.key") - v.Add("sslrootcert", c.PGUrlCertsDir+"/ca.crt") - v.Add("sslmode", "verify-full") + user := DefaultUser + // TODO(DarrylWong): Support authentication for multitenant, + // since they do not use roach:system. + password := DefaultPassword + + switch auth { + case AuthRootCert: + v.Add("sslcert", fmt.Sprintf("%s/client.root.crt", c.PGUrlCertsDir)) + v.Add("sslkey", fmt.Sprintf("%s/client.root.key", c.PGUrlCertsDir)) + v.Add("sslrootcert", fmt.Sprintf("%s/ca.crt", c.PGUrlCertsDir)) + v.Add("sslmode", "verify-full") + case AuthUserPassword: + u.User = url.UserPassword(user, password) + v.Add("sslmode", "allow") + case AuthUserCert: + u.User = url.UserPassword(user, password) + v.Add("sslcert", fmt.Sprintf("%s/client.%s.crt", c.PGUrlCertsDir, user)) + v.Add("sslkey", fmt.Sprintf("%s/client.%s.key", c.PGUrlCertsDir, user)) + v.Add("sslrootcert", fmt.Sprintf("%s/ca.crt", c.PGUrlCertsDir)) + v.Add("sslmode", "verify-full") + } } else { v.Add("sslmode", "disable") } - // Add the virtual cluster name option explicitly for shared-process // tenants or for the system tenant. This is to make sure we connect // to the system tenant in case we have previously changed the @@ -555,7 +596,7 @@ func (c *SyncedCluster) ExecOrInteractiveSQL( if err != nil { return err } - url := c.NodeURL("localhost", desc.Port, virtualClusterName, desc.ServiceMode) + url := c.NodeURL("localhost", desc.Port, virtualClusterName, desc.ServiceMode, AuthRootCert) binary := cockroachNodeBinary(c, c.Nodes[0]) allArgs := []string{binary, "sql", "--url", url} allArgs = append(allArgs, ssh.Escape(args)) @@ -585,9 +626,8 @@ func (c *SyncedCluster) ExecSQL( cmd = fmt.Sprintf(`cd %s ; `, c.localVMDir(node)) } cmd += cockroachNodeBinary(c, node) + " sql --url " + - c.NodeURL("localhost", desc.Port, virtualClusterName, desc.ServiceMode) + " " + + c.NodeURL("localhost", desc.Port, virtualClusterName, desc.ServiceMode, AuthRootCert) + " " + ssh.Escape(args) - return c.runCmdOnSingleNode(ctx, l, node, cmd, defaultCmdOpts("run-sql")) }) @@ -990,7 +1030,6 @@ func (c *SyncedCluster) createAdminUserForSecureCluster( return } - const username = "roach" // N.B.: although using the same username/password combination would // be easier to remember, if we do it for the system interface and // virtual clusters we would be unable to log-in to the virtual @@ -1000,12 +1039,12 @@ func (c *SyncedCluster) createAdminUserForSecureCluster( // the virtual cluster we are connecting to in the console. var password = startOpts.VirtualClusterName if startOpts.VirtualClusterName == "" { - password = SystemInterfaceName + password = DefaultPassword } stmts := strings.Join([]string{ - fmt.Sprintf("CREATE USER IF NOT EXISTS %s WITH LOGIN PASSWORD '%s'", username, password), - fmt.Sprintf("GRANT ADMIN TO %s", username), + fmt.Sprintf("CREATE USER IF NOT EXISTS %s WITH LOGIN PASSWORD '%s'", DefaultUser, password), + fmt.Sprintf("GRANT ADMIN TO %s WITH ADMIN OPTION", DefaultUser), }, "; ") // We retry a few times here because cockroach process might not be @@ -1040,7 +1079,7 @@ func (c *SyncedCluster) createAdminUserForSecureCluster( virtualClusterInfo = fmt.Sprintf(" for virtual cluster %s", startOpts.VirtualClusterName) } - l.Printf("log into DB console%s with user=%s password=%s", virtualClusterInfo, username, password) + l.Printf("log into DB console%s with user=%s password=%s", virtualClusterInfo, DefaultUser, password) } func (c *SyncedCluster) setClusterSettings( @@ -1107,7 +1146,7 @@ func (c *SyncedCluster) generateClusterSettingCmd( if err != nil { return "", err } - url := c.NodeURL("localhost", port, SystemInterfaceName /* virtualClusterName */, ServiceModeShared) + url := c.NodeURL("localhost", port, SystemInterfaceName /* virtualClusterName */, ServiceModeShared, AuthRootCert) // We use `mkdir -p` here since the directory may not exist if an in-memory // store is used. @@ -1129,7 +1168,7 @@ func (c *SyncedCluster) generateInitCmd(ctx context.Context, node Node) (string, if err != nil { return "", err } - url := c.NodeURL("localhost", port, SystemInterfaceName /* virtualClusterName */, ServiceModeShared) + url := c.NodeURL("localhost", port, SystemInterfaceName /* virtualClusterName */, ServiceModeShared, AuthRootCert) binary := cockroachNodeBinary(c, node) initCmd += fmt.Sprintf(` if ! test -e %[1]s ; then @@ -1336,7 +1375,7 @@ func (c *SyncedCluster) createFixedBackupSchedule( if err != nil { return err } - url := c.NodeURL("localhost", port, SystemInterfaceName /* virtualClusterName */, ServiceModeShared) + url := c.NodeURL("localhost", port, SystemInterfaceName /* virtualClusterName */, ServiceModeShared, AuthRootCert) fullCmd := fmt.Sprintf(`COCKROACH_CONNECT_TIMEOUT=%d %s sql --url %s -e %q`, startSQLTimeout, binary, url, createScheduleCmd) // Instead of using `c.ExecSQL()`, use `c.runCmdOnSingleNode()`, which allows us to diff --git a/pkg/roachprod/roachprod.go b/pkg/roachprod/roachprod.go index e219ec1fc4c1..8a8e799db96e 100644 --- a/pkg/roachprod/roachprod.go +++ b/pkg/roachprod/roachprod.go @@ -918,6 +918,7 @@ type PGURLOptions struct { External bool VirtualClusterName string SQLInstance int + Auth install.PGAuthMode } // PgURL generates pgurls for the nodes in a cluster. @@ -956,7 +957,7 @@ func PgURL( if ip == "" { return nil, errors.Errorf("empty ip: %v", ips) } - urls = append(urls, c.NodeURL(ip, desc.Port, opts.VirtualClusterName, desc.ServiceMode)) + urls = append(urls, c.NodeURL(ip, desc.Port, opts.VirtualClusterName, desc.ServiceMode, opts.Auth)) } if len(urls) != len(nodes) { return nil, errors.Errorf("have nodes %v, but urls %v from ips %v", nodes, urls, ips) diff --git a/pkg/server/server_controller_test.go b/pkg/server/server_controller_test.go index d02a217bd4ce..7dcec13397a5 100644 --- a/pkg/server/server_controller_test.go +++ b/pkg/server/server_controller_test.go @@ -54,7 +54,9 @@ func TestServerController(t *testing.T) { require.Nil(t, d) require.Error(t, err, `no tenant found with name "somename"`) - _, err = db.Exec("CREATE TENANT hello; ALTER TENANT hello START SERVICE SHARED") + _, err = db.Exec("CREATE TENANT hello") + require.NoError(t, err) + _, err = db.Exec("ALTER TENANT hello START SERVICE SHARED") require.NoError(t, err) _, _, err = sc.getServer(ctx, "hello") diff --git a/pkg/server/serverpb/BUILD.bazel b/pkg/server/serverpb/BUILD.bazel index efe784a5eb20..2474d70d6161 100644 --- a/pkg/server/serverpb/BUILD.bazel +++ b/pkg/server/serverpb/BUILD.bazel @@ -26,6 +26,7 @@ proto_library( "//pkg/kv/kvserver/kvserverpb:kvserverpb_proto", "//pkg/kv/kvserver/liveness/livenesspb:livenesspb_proto", "//pkg/kv/kvserver/loqrecovery/loqrecoverypb:loqrecoverypb_proto", + "//pkg/multitenant/mtinfopb:mtinfopb_proto", "//pkg/roachpb:roachpb_proto", "//pkg/server/diagnostics/diagnosticspb:diagnosticspb_proto", "//pkg/server/status/statuspb:statuspb_proto", @@ -67,6 +68,7 @@ go_proto_library( "//pkg/kv/kvserver/kvserverpb", "//pkg/kv/kvserver/liveness/livenesspb", "//pkg/kv/kvserver/loqrecovery/loqrecoverypb", + "//pkg/multitenant/mtinfopb", "//pkg/roachpb", "//pkg/server/diagnostics/diagnosticspb", "//pkg/server/status/statuspb", diff --git a/pkg/server/serverpb/status.go b/pkg/server/serverpb/status.go index 00c92a435354..1e68ece42efc 100644 --- a/pkg/server/serverpb/status.go +++ b/pkg/server/serverpb/status.go @@ -47,6 +47,7 @@ type SQLStatusServer interface { Logs(context.Context, *LogsRequest) (*LogEntriesResponse, error) NodesUI(context.Context, *NodesRequest) (*NodesResponseExternal, error) RequestJobProfilerExecutionDetails(context.Context, *RequestJobProfilerExecutionDetailsRequest) (*RequestJobProfilerExecutionDetailsResponse, error) + TenantServiceStatus(context.Context, *TenantServiceStatusRequest) (*TenantServiceStatusResponse, error) } // OptionalNodesStatusServer is a StatusServer that is only optionally present diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index 57d09574e94e..c3e4f377eae3 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -16,6 +16,7 @@ import "build/info.proto"; import "errorspb/errors.proto"; import "gossip/gossip.proto"; import "jobs/jobspb/jobs.proto"; +import "multitenant/mtinfopb/info.proto"; import "roachpb/data.proto"; import "roachpb/index_usage_stats.proto"; import "roachpb/span_config.proto"; @@ -613,6 +614,25 @@ message DownloadSpanResponse { ]; } +message TenantServiceStatusRequest { + string node_id = 1 [(gogoproto.customname) = "NodeID"]; + uint64 tenant_id = 2 [(gogoproto.customname) = "TenantID"]; +} + +message TenantServiceStatusResponse { + map status_by_node_id = 1 [ + (gogoproto.castkey) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID", + (gogoproto.customname) = "StatusByNodeID", + (gogoproto.nullable) = false + ]; + + map errors_by_node_id = 2 [ + (gogoproto.castkey) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID", + (gogoproto.customname) = "ErrorsByNodeID", + (gogoproto.nullable) = false + ]; +} + message TraceEvent { google.protobuf.Timestamp time = 1 [ (gogoproto.nullable) = false, (gogoproto.stdtime) = true ]; @@ -2198,6 +2218,15 @@ service Status { }; } + // TenantServiceStatus returns the current service and data state of + // the given tenant as known to the server orchestrator, which may + // differ from the database state. + rpc TenantServiceStatus(TenantServiceStatusRequest) returns (TenantServiceStatusResponse) { + option (google.api.http) = { + get : "/_status/tenant_service_status" + }; + } + // TenantRanges requests internal details about all range replicas within // the tenant's keyspace at the time the request is processed. rpc TenantRanges(TenantRangesRequest) returns (TenantRangesResponse) { diff --git a/pkg/server/status.go b/pkg/server/status.go index a4d9e363c17c..b8e4ec3e736f 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -44,6 +44,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/storepool" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" + "github.com/cockroachdb/cockroach/pkg/multitenant/mtinfopb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/rpc" "github.com/cockroachdb/cockroach/pkg/security" @@ -3622,6 +3623,81 @@ func (s *systemStatusServer) SpanStats( return batchedSpanStats(ctx, req, s.getSpanStatsInternal, batchSize) } +func (s *systemStatusServer) TenantServiceStatus( + ctx context.Context, req *serverpb.TenantServiceStatusRequest, +) (*serverpb.TenantServiceStatusResponse, error) { + ctx = authserver.ForwardSQLIdentityThroughRPCCalls(ctx) + ctx = s.AnnotateCtx(ctx) + + resp := &serverpb.TenantServiceStatusResponse{ + StatusByNodeID: make(map[roachpb.NodeID]mtinfopb.SQLInfo), + ErrorsByNodeID: make(map[roachpb.NodeID]string), + } + if len(req.NodeID) > 0 { + reqNodeID, local, err := s.parseNodeID(req.NodeID) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, err.Error()) + } + + if local { + info, err := s.localTenantServiceStatus(ctx, req) + if err != nil { + return nil, err + } + resp.StatusByNodeID[reqNodeID] = info + return resp, nil + } + return nil, errors.AssertionFailedf("requesting service status on a specific node is not supported yet") + } + + // Send TenantStatusService request to all stores on all nodes. + remoteRequest := serverpb.TenantServiceStatusRequest{NodeID: "local", TenantID: req.TenantID} + nodeFn := func(ctx context.Context, status serverpb.StatusClient, _ roachpb.NodeID) (*serverpb.TenantServiceStatusResponse, error) { + return status.TenantServiceStatus(ctx, &remoteRequest) + } + responseFn := func(nodeID roachpb.NodeID, remoteResp *serverpb.TenantServiceStatusResponse) { + resp.StatusByNodeID[nodeID] = remoteResp.StatusByNodeID[nodeID] + } + errorFn := func(nodeID roachpb.NodeID, err error) { + resp.ErrorsByNodeID[nodeID] = err.Error() + } + + if err := iterateNodes(ctx, s.serverIterator, s.stopper, "tenant service status", + noTimeout, + s.dialNode, + nodeFn, + responseFn, + errorFn, + ); err != nil { + return nil, srverrors.ServerError(ctx, err) + } + + return resp, nil +} + +func (s *systemStatusServer) localTenantServiceStatus( + ctx context.Context, req *serverpb.TenantServiceStatusRequest, +) (mtinfopb.SQLInfo, error) { + ret := mtinfopb.SQLInfo{} + r, err := s.sqlServer.execCfg.TenantCapabilitiesReader.Get("tenant service status") + if err != nil { + return ret, err + } + tid, err := roachpb.MakeTenantID(req.TenantID) + if err != nil { + return ret, err + } + entry, _, found := r.GetInfo(tid) + if !found { + return ret, nil + } + ret.ID = entry.TenantID.ToUint64() + ret.Name = entry.Name + ret.DataState = entry.DataState + ret.ServiceMode = entry.ServiceMode + return ret, nil +} + // Diagnostics returns an anonymized diagnostics report. func (s *statusServer) Diagnostics( ctx context.Context, req *serverpb.DiagnosticsRequest, diff --git a/pkg/sql/catalog/internal/catkv/catalog_reader_cached.go b/pkg/sql/catalog/internal/catkv/catalog_reader_cached.go index 54955647267f..e0357d29fa63 100644 --- a/pkg/sql/catalog/internal/catkv/catalog_reader_cached.go +++ b/pkg/sql/catalog/internal/catkv/catalog_reader_cached.go @@ -200,15 +200,27 @@ func (c *cachedCatalogReader) ScanAll(ctx context.Context, txn *kv.Txn) (nstree. c.hasScanAll = true c.hasScanNamespaceForDatabases = true c.hasScanAllComments = true - for id, s := range c.byIDState { - s.hasScanNamespaceForDatabaseEntries = true - s.hasScanNamespaceForDatabaseSchemas = true - s.hasGetDescriptorEntries = true - c.byIDState[id] = s - } - for ni, s := range c.byNameState { - s.hasGetNamespaceEntries = true - c.byNameState[ni] = s + if err := read.ForEachDescriptor(func(desc catalog.Descriptor) error { + // We must update the byID and byName states for each descriptor that + // was read. + var idState byIDStateValue + var nameState byNameStateValue + idState.hasScanNamespaceForDatabaseEntries = true + idState.hasScanNamespaceForDatabaseSchemas = true + idState.hasGetDescriptorEntries = true + nameState.hasGetNamespaceEntries = true + c.setByIDState(desc.GetID(), idState) + ni := descpb.NameInfo{ + ParentID: desc.GetParentID(), + Name: desc.GetName(), + } + if typ := desc.DescriptorType(); typ != catalog.Database && typ != catalog.Schema { + ni.ParentSchemaID = desc.GetParentSchemaID() + } + c.setByNameState(ni, nameState) + return nil + }); err != nil { + return nstree.Catalog{}, err } return read, nil } diff --git a/pkg/sql/catalog/internal/catkv/testdata/testdata_app b/pkg/sql/catalog/internal/catkv/testdata/testdata_app index 397dc7572247..91374d2eae67 100644 --- a/pkg/sql/catalog/internal/catkv/testdata/testdata_app +++ b/pkg/sql/catalog/internal/catkv/testdata/testdata_app @@ -405,6 +405,15 @@ is_desc_id_known_to_not_exist id=123 ---- true +# Make sure scan_all properly updated the cache. +is_id_in_cache id=107 +---- +true + +is_id_in_cache id=108 +---- +true + # Get* queries involving IDs or names which don't exist after a # ScanAll should bypass storage in the cached CatalogReader. get_by_ids id=456 diff --git a/pkg/sql/catalog/internal/catkv/testdata/testdata_system b/pkg/sql/catalog/internal/catkv/testdata/testdata_system index c8943be4f792..ba4a1c9d6696 100644 --- a/pkg/sql/catalog/internal/catkv/testdata/testdata_system +++ b/pkg/sql/catalog/internal/catkv/testdata/testdata_system @@ -438,6 +438,15 @@ is_desc_id_known_to_not_exist id=123 ---- true +# Make sure scan_all properly updated the cache. +is_id_in_cache id=107 +---- +true + +is_id_in_cache id=108 +---- +true + # Get* queries involving IDs or names which don't exist after a # ScanAll should bypass storage in the cached CatalogReader. get_by_ids id=456 diff --git a/pkg/sql/logictest/testdata/logic_test/tenant b/pkg/sql/logictest/testdata/logic_test/tenant index a2c4d965f1b2..b264f0e375c7 100644 --- a/pkg/sql/logictest/testdata/logic_test/tenant +++ b/pkg/sql/logictest/testdata/logic_test/tenant @@ -407,6 +407,9 @@ INSERT INTO system.tenant_usage( ru_burst_limit, ru_refill_rate, ru_current, current_share_sum, total_consumption) VALUES ($tmplid, 0, 0, now(), 11, 22, 33, 44, ''::BYTES); + + +statement ok ALTER TENANT tmpl START SERVICE SHARED; -- will not be copied. # Use it to create a new tenant. @@ -495,8 +498,12 @@ FROM system.tenant_usage WHERE tenant_id = $otherid # Clean up. statement ok -DROP TENANT othertenant; -ALTER TENANT tmpl STOP SERVICE; +DROP TENANT othertenant + +statement ok +ALTER TENANT tmpl STOP SERVICE + +statement ok DROP TENANT tmpl statement ok @@ -542,6 +549,8 @@ RESET CLUSTER SETTING server.controller.default_target_cluster statement ok DROP TENANT noservice; CREATE TENANT withservice; + +statement ok ALTER TENANT withservice START SERVICE SHARED statement ok diff --git a/pkg/sql/tenant_update.go b/pkg/sql/tenant_update.go index 93529835dff1..aa8886950c9c 100644 --- a/pkg/sql/tenant_update.go +++ b/pkg/sql/tenant_update.go @@ -14,10 +14,12 @@ import ( "context" "time" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/multitenant" "github.com/cockroachdb/cockroach/pkg/multitenant/mtinfopb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/server/serverpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/isql" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" @@ -27,8 +29,11 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" "github.com/cockroachdb/cockroach/pkg/sql/syntheticprivilege" "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/log/logcrash" "github.com/cockroachdb/cockroach/pkg/util/protoutil" + "github.com/cockroachdb/cockroach/pkg/util/retry" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" ) @@ -198,7 +203,7 @@ func ActivateTenant( } func (p *planner) setTenantService( - ctx context.Context, info *mtinfopb.TenantInfo, newMode mtinfopb.TenantServiceMode, + ctx context.Context, info *mtinfopb.TenantInfo, targetMode mtinfopb.TenantServiceMode, ) error { if p.EvalContext().TxnReadOnly { return readOnlyError("ALTER VIRTUAL CLUSTER SERVICE") @@ -214,21 +219,167 @@ func (p *planner) setTenantService( return err } - if newMode == info.ServiceMode { - // No-op. Do nothing. - return nil + if !p.extendedEvalCtx.TxnIsSingleStmt { + return errors.Errorf("ALTER VIRTUAL CLUSTER SERVICE cannot be used inside a multi-statement transaction") } - if newMode != mtinfopb.ServiceModeNone && info.ServiceMode != mtinfopb.ServiceModeNone { - return errors.WithHint(pgerror.Newf(pgcode.ObjectNotInPrerequisiteState, - "cannot change service mode %v to %v directly", - info.ServiceMode, newMode), - "Use ALTER VIRTUAL CLUSTER STOP SERVICE first.") + lastMode := info.ServiceMode + for { + mode, err := stepTenantServiceState(ctx, + p.ExecCfg().InternalDB, + p.execCfg.SQLStatusServer, + p.ExecCfg().Settings, info, targetMode) + if err != nil { + return err + } + if targetMode == mode { + return nil + } + if mode == lastMode { + return errors.AssertionFailedf("service mode did not advance from %q", lastMode) + } + } +} + +func stepTenantServiceState( + ctx context.Context, + db *InternalDB, + statusSrv serverpb.SQLStatusServer, + settings *cluster.Settings, + inInfo *mtinfopb.TenantInfo, + targetMode mtinfopb.TenantServiceMode, +) (mtinfopb.TenantServiceMode, error) { + updateTenantMode := func(txn isql.Txn, info *mtinfopb.TenantInfo, mode mtinfopb.TenantServiceMode) error { + // Zero the LastRevertTenantTimestamp since + // starting the service invalidates any + // previous revert. + if mode == mtinfopb.ServiceModeShared || mode == mtinfopb.ServiceModeExternal { + info.LastRevertTenantTimestamp = hlc.Timestamp{} + } + log.Infof(ctx, "transitioning tenant %d from %s to %s", info.ID, info.ServiceMode, mode) + info.ServiceMode = mode + return UpdateTenantRecord(ctx, settings, txn, info) + } + + // In 24.1 we support the following state transitions + // + // +-> External -+ + // None-| |-> Stopping + // ^ +-> Shared -+ | + // | | + // +-----------------------+ + // + // NB: We could eliminate some of the error cases here. For + // instance we could support External -> Shared now but still + // return an error in that case. + nextMode := func(currentMode mtinfopb.TenantServiceMode, targetMode mtinfopb.TenantServiceMode) (mtinfopb.TenantServiceMode, error) { + switch currentMode { + case mtinfopb.ServiceModeNone: + switch targetMode { + case mtinfopb.ServiceModeShared, mtinfopb.ServiceModeExternal: + return targetMode, nil + default: + return 0, errors.AssertionFailedf("unsupported service mode transition from %s to %s", currentMode, targetMode) + } + case mtinfopb.ServiceModeShared, mtinfopb.ServiceModeExternal: + switch targetMode { + case mtinfopb.ServiceModeNone: + if settings.Version.IsActive(ctx, clusterversion.V24_1) { + return mtinfopb.ServiceModeStopping, nil + } else { + return mtinfopb.ServiceModeNone, nil + } + case mtinfopb.ServiceModeExternal, mtinfopb.ServiceModeShared: + return 0, errors.WithHint(pgerror.Newf(pgcode.ObjectNotInPrerequisiteState, + "cannot change service mode %v to %v directly", currentMode, targetMode), + "Use ALTER VIRTUAL CLUSTER STOP SERVICE first.") + default: + return 0, errors.AssertionFailedf("unsupported service mode transition from %s to %s", currentMode, targetMode) + } + case mtinfopb.ServiceModeStopping: + switch targetMode { + case mtinfopb.ServiceModeNone: + return targetMode, nil + case mtinfopb.ServiceModeShared, mtinfopb.ServiceModeExternal: + return 0, pgerror.Newf(pgcode.ObjectNotInPrerequisiteState, + "service currently stopping, cannot start service until stop has completed") + default: + return 0, errors.AssertionFailedf("unsupported service mode transition from %s to %s", currentMode, targetMode) + } + default: + return 0, errors.AssertionFailedf("unknown service mode %q", currentMode) + } + + } + + var mode mtinfopb.TenantServiceMode + if err := db.Txn(ctx, func(ctx context.Context, txn isql.Txn) error { + tid, err := roachpb.MakeTenantID(inInfo.ID) + if err != nil { + return err + } + + info, err := GetTenantRecordByID(ctx, txn, tid, settings) + if err != nil { + return err + } + + if targetMode == info.ServiceMode { + // No-op. Do nothing. + mode = targetMode + return nil + } + + mode, err = nextMode(info.ServiceMode, targetMode) + if err != nil { + return err + } + return updateTenantMode(txn, info, mode) + }); err != nil { + return mode, err + } + + if mode == mtinfopb.ServiceModeStopping { + // The timeout here is to protect against a scenario + // in which another client also stops the service, + // observes the stopped state first, and then restarts + // the service while we are still waiting for all + // nodes to observe the stopped state. + // + // We could avoid this by treating these transitions + // more like schema changes. + log.Infof(ctx, "waiting for all nodes to stop service for tenant %d", inInfo.ID) + if err := timeutil.RunWithTimeout(ctx, "wait-for-tenant-stop", 10*time.Minute, func(ctx context.Context) error { + retryOpts := retry.Options{MaxBackoff: 10 * time.Second} + for re := retry.StartWithCtx(ctx, retryOpts); re.Next(); { + resp, err := statusSrv.TenantServiceStatus(ctx, &serverpb.TenantServiceStatusRequest{TenantID: inInfo.ID}) + if err != nil { + return errors.Wrap(err, "failed waiting for tenant service status") + } + if len(resp.ErrorsByNodeID) > 0 { + for _, errStr := range resp.ErrorsByNodeID { + return errors.Newf("failed to poll service status on all nodes (first error): %s", errStr) + } + } + allStopped := true + for n, info := range resp.StatusByNodeID { + stoppedOrStopping := info.ServiceMode == mtinfopb.ServiceModeStopping || info.ServiceMode == mtinfopb.ServiceModeNone + if !stoppedOrStopping { + log.Infof(ctx, "tenant %d is still running on node %s", info.ID, n) + allStopped = false + } + } + if allStopped { + break + } + } + return nil + }); err != nil { + return mode, err + } } - info.ServiceMode = newMode - info.LastRevertTenantTimestamp = hlc.Timestamp{} - return UpdateTenantRecord(ctx, p.ExecCfg().Settings, p.InternalSQLTxn(), info) + return mode, nil } func (p *planner) renameTenant( diff --git a/pkg/workload/schemachange/operation_generator.go b/pkg/workload/schemachange/operation_generator.go index 3c8d603eabe2..c2987257578c 100644 --- a/pkg/workload/schemachange/operation_generator.go +++ b/pkg/workload/schemachange/operation_generator.go @@ -2802,7 +2802,12 @@ func (og *operationGenerator) commentOn(ctx context.Context, tx pgx.Tx) (*opStmt } stmt := makeOpStmt(OpStmtDDL) - stmt.sql = fmt.Sprintf(`COMMENT ON %s IS 'comment from the RSW'`, picked) + comment := "comment from the RSW" + if og.params.rng.Float64() < 0.3 { + // Delete the comment with some probability. + comment = "" + } + stmt.sql = fmt.Sprintf(`COMMENT ON %s IS '%s'`, picked, comment) return stmt, nil } diff --git a/pkg/workload/schemachange/optype.go b/pkg/workload/schemachange/optype.go index 7765061b2734..6085b95a382d 100644 --- a/pkg/workload/schemachange/optype.go +++ b/pkg/workload/schemachange/optype.go @@ -279,7 +279,7 @@ var opWeights = []int{ alterTableSetColumnDefault: 1, alterTableSetColumnNotNull: 1, alterTypeDropValue: 0, // Disabled and tracked with #114844, #113859, and #115612. - commentOn: 0, // Disabled and tracked with #116795. + commentOn: 1, createFunction: 1, createIndex: 1, createSchema: 1,