From ff04f3f690fc8abcf8e180d24ad7ed45301a9728 Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Thu, 8 Mar 2018 16:07:52 -0500 Subject: [PATCH 01/13] Update 'Stop a Node' with more draining info Addresses https://github.com/cockroachdb/docs/issues/2436 --- _includes/settings/v2.0/settings.md | 52 +++++++++++++++++++++++++++++ v2.0/cluster-settings.md | 12 +------ v2.0/stop-a-node.md | 9 ++++- 3 files changed, 61 insertions(+), 12 deletions(-) create mode 100644 _includes/settings/v2.0/settings.md diff --git a/_includes/settings/v2.0/settings.md b/_includes/settings/v2.0/settings.md new file mode 100644 index 00000000000..67a6dab2e4a --- /dev/null +++ b/_includes/settings/v2.0/settings.md @@ -0,0 +1,52 @@ +| SETTING | TYPE | DEFAULT | DESCRIPTION | +|-----------------------------------------------------|-------------------|------------|-------------------------------------------------------------------------------------------------------------------------------------------------| +| `cloudstorage.gs.default.key` | string | `` | if set, JSON key to use during Google Cloud Storage operations | +| `cloudstorage.http.custom_ca` | string | `` | custom root CA (appended to system's default CAs) for verifying certificates when interacting with HTTPS storage | +| `cluster.organization` | string | `` | organization name | +| `debug.panic_on_failed_assertions` | boolean | `false` | panic when an assertion fails rather than reporting | +| `diagnostics.reporting.enabled` | boolean | `true` | enable reporting diagnostic metrics to cockroach labs | +| `diagnostics.reporting.interval` | duration | `1h0m0s` | interval at which diagnostics data should be reported | +| `diagnostics.reporting.send_crash_reports` | boolean | `true` | send crash and panic reports | +| `kv.allocator.lease_rebalancing_aggressiveness` | float | `1E+00` | set greater than 1.0 to rebalance leases toward load more aggressively, or between 0 and 1.0 to be more conservative about rebalancing leases | +| `kv.allocator.load_based_lease_rebalancing.enabled` | boolean | `true` | set to enable rebalancing of range leases based on load and latency | +| `kv.allocator.range_rebalance_threshold` | float | `5E-02` | minimum fraction away from the mean a store's range count can be before it is considered overfull or underfull | +| `kv.allocator.stat_based_rebalancing.enabled` | boolean | `false` | set to enable rebalancing of range replicas based on write load and disk usage | +| `kv.allocator.stat_rebalance_threshold` | float | `2E-01` | minimum fraction away from the mean a store's stats (like disk usage or writes per second) can be before it is considered overfull or underfull | +| `kv.bulk_io_write.max_rate` | byte size | `8.0 EiB` | the rate limit (bytes/sec) to use for writes to disk on behalf of bulk io ops | +| `kv.bulk_sst.sync_size` | byte size | `2.0 MiB` | threshold after which non-Rocks SST writes must fsync (0 disables) | +| `kv.raft.command.max_size` | byte size | `64 MiB` | maximum size of a raft command | +| `kv.raft_log.synchronize` | boolean | `true` | set to true to synchronize on Raft log writes to persistent storage | +| `kv.range.backpressure_range_size_multiplier` | float | `2E+00` | multiple of range_max_bytes that a range is allowed to grow to without splitting before writes to that range are blocked, or 0 to disable | +| `kv.range_descriptor_cache.size` | integer | `1000000` | maximum number of entries in the range descriptor and leaseholder caches | +| `kv.snapshot_rebalance.max_rate` | byte size | `2.0 MiB` | the rate limit (bytes/sec) to use for rebalance snapshots | +| `kv.snapshot_recovery.max_rate` | byte size | `8.0 MiB` | the rate limit (bytes/sec) to use for recovery snapshots | +| `kv.transaction.max_intents_bytes` | integer | `256000` | maximum number of bytes used to track write intents in transactions | +| `kv.transaction.max_refresh_spans_bytes` | integer | `256000` | maximum number of bytes used to track refresh spans in serializable transactions | +| `rocksdb.min_wal_sync_interval` | duration | `0s` | minimum duration between syncs of the RocksDB WAL | +| `server.consistency_check.interval` | duration | `24h0m0s` | the time between range consistency checks; set to 0 to disable consistency checking | +| `server.declined_reservation_timeout` | duration | `1s` | the amount of time to consider the store throttled for up-replication after a reservation was declined | +| `server.failed_reservation_timeout` | duration | `5s` | the amount of time to consider the store throttled for up-replication after a failed reservation call | +| `server.remote_debugging.mode` | string | `local` | set to enable remote debugging, localhost-only or disable (any, local, off) | +| `server.shutdown.drain_wait` | duration | `0s` | the amount of time a server waits in an unready state before proceeding with the rest of the shutdown process | +| `server.shutdown.query_wait` | duration | `10s` | the server will wait for at least this amount of time for active queries to finish | +| `server.time_until_store_dead` | duration | `5m0s` | the time after which if there is no new gossiped information about a store, it is considered dead | +| `server.web_session_timeout` | duration | `168h0m0s` | the duration that a newly created web session will be valid | +| `sql.defaults.distsql` | enumeration | `1` | Default distributed SQL execution mode [off = 0, auto = 1, on = 2] | +| `sql.distsql.distribute_index_joins` | boolean | `true` | if set, for index joins we instantiate a join reader on every node that has a stream; if not set, we use a single join reader | +| `sql.distsql.interleaved_joins.enabled` | boolean | `true` | if set we plan interleaved table joins instead of merge joins when possible | +| `sql.distsql.merge_joins.enabled` | boolean | `true` | if set, we plan merge joins when possible | +| `sql.distsql.temp_storage.joins` | boolean | `true` | set to true to enable use of disk for distributed sql joins | +| `sql.distsql.temp_storage.sorts` | boolean | `true` | set to true to enable use of disk for distributed sql sorts | +| `sql.distsql.temp_storage.workmem` | byte size | `64 MiB` | maximum amount of memory in bytes a processor can use before falling back to temp storage | +| `sql.metrics.statement_details.dump_to_logs` | boolean | `false` | dump collected statement statistics to node logs when periodically cleared | +| `sql.metrics.statement_details.enabled` | boolean | `true` | collect per-statement query statistics | +| `sql.metrics.statement_details.threshold` | duration | `0s` | minimum execution time to cause statistics to be collected | +| `sql.trace.log_statement_execute` | boolean | `false` | set to true to enable logging of executed statements | +| `sql.trace.session_eventlog.enabled` | boolean | `false` | set to true to enable session tracing | +| `sql.trace.txn.enable_threshold` | duration | `0s` | duration beyond which all transactions are traced (set to 0 to disable) | +| `timeseries.resolution_10s.storage_duration` | duration | `720h0m0s` | the amount of time to store timeseries data | +| `timeseries.storage.enabled` | boolean | `true` | if set, periodic timeseries data is stored within the cluster; disabling is not recommended unless you are storing the data elsewhere | +| `trace.debug.enable` | boolean | `false` | if set, traces for recent requests can be seen in the /debug page | +| `trace.lightstep.token` | string | `` | if set, traces go to Lightstep using this token | +| `trace.zipkin.collector` | string | `` | if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'); ignored if trace.lightstep.token is set. | +| `version` | custom validation | `2.0` | set the active cluster version in the format '.'. | diff --git a/v2.0/cluster-settings.md b/v2.0/cluster-settings.md index 6ff008e6d73..9897bb697db 100644 --- a/v2.0/cluster-settings.md +++ b/v2.0/cluster-settings.md @@ -22,17 +22,7 @@ They can be updated anytime after a cluster has been started, but only by the `r The following settings can be configured without further input from Cockroach Labs: -| Setting | Description | Value type | Default value | -|---------|-------------|---------------|---------------| -| `diagnostics.reporting.enabled` | Enable automatic reporting of usage data to Cockroach Labs. | Boolean | `true` | -| `diagnostics.reporting.interval` | Interval between automatic reports. **Note that increasing this value will also cause memory usage per node to increase, as the reporting data is collected into RAM.** | Interval | 1 hour | -| `diagnostics.reporting.report_metrics` | Enable collection and reporting of diagnostic metrics. Only applicable if `diagnostics.reporting.enabled` is `true`. | Boolean | `true` | -| `diagnostics.reporting.send_crash_reports` | Enable collection and reporting of node crashes. Only applicable if `diagnostics.reporting.enabled` is `true`. | Boolean | `true` | -| `sql.defaults.distsql` | Define whether new client sessions try to [distribute query execution](https://www.cockroachlabs.com/blog/local-and-distributed-processing-in-cockroachdb/) by default. | Integer | 1 (automatic) | -| `sql.metrics.statement_details.enabled` | Collect per-node, per-statement query statistics, visible in the virtual table `crdb_internal.node_statement_statistics`. | Boolean | `true` | -| `sql.metrics.statement_details.dump_to_logs` | On each node, also copy collected per-statement statistics to the [logging output](debug-and-error-logs.html) when automatic reporting is enabled. | Boolean | `false` | -| `sql.metrics.statement_details.threshold` | Only collect per-statement statistics for statements that run longer than this threshold. | Interval | 0 seconds (all statements) | -| `sql.trace.log_statement_execute` | On each node, copy all executed statements to the [logging output](debug-and-error-logs.html). | Boolean | `false` | +{% include settings/v2.0/settings.md %} - ## View Current Cluster Settings Use the [`SHOW CLUSTER SETTING`](show-cluster-setting.html) statement. From 3df0393155a620a9c853a0f462535079f85f9aad Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Tue, 13 Mar 2018 15:22:35 -0400 Subject: [PATCH 04/13] Update cluster setting language based on feedback --- v2.0/stop-a-node.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2.0/stop-a-node.md b/v2.0/stop-a-node.md index 8b9211551cf..bd33bd3afe2 100644 --- a/v2.0/stop-a-node.md +++ b/v2.0/stop-a-node.md @@ -16,7 +16,7 @@ For information about permanently removing nodes to downsize a cluster or react When you stop a node, it performs the following steps: -- Finishes in-flight requests. Note that this is a best effort that times out at the `server.shutdown.query_wait` [cluster setting](cluster-settings.html). +- Finishes in-flight requests. Note that this is a best effort that times out after the duration specified by the `server.shutdown.query_wait` [cluster setting](cluster-settings.html). - Transfers all *range leases* and Raft leadership to other nodes. - Gossips its draining state to the cluster, so that other nodes do not try to distribute query planning to the draining node, and no leases are transferred to the draining node. Note that this is best effort that times out at the `server.shutdown.drain_wait` [cluster setting](cluster-settings.html), so other nodes may not receive the gossip info in time. - No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. From 948ef351e192b4038a438420f19818a80dc857f3 Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Tue, 13 Mar 2018 15:29:05 -0400 Subject: [PATCH 05/13] One space after a period; it's "a" best effort --- v2.0/stop-a-node.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2.0/stop-a-node.md b/v2.0/stop-a-node.md index bd33bd3afe2..bec844868d3 100644 --- a/v2.0/stop-a-node.md +++ b/v2.0/stop-a-node.md @@ -18,7 +18,7 @@ When you stop a node, it performs the following steps: - Finishes in-flight requests. Note that this is a best effort that times out after the duration specified by the `server.shutdown.query_wait` [cluster setting](cluster-settings.html). - Transfers all *range leases* and Raft leadership to other nodes. -- Gossips its draining state to the cluster, so that other nodes do not try to distribute query planning to the draining node, and no leases are transferred to the draining node. Note that this is best effort that times out at the `server.shutdown.drain_wait` [cluster setting](cluster-settings.html), so other nodes may not receive the gossip info in time. +- Gossips its draining state to the cluster, so that other nodes do not try to distribute query planning to the draining node, and no leases are transferred to the draining node. Note that this is a best effort that times out at the `server.shutdown.drain_wait` [cluster setting](cluster-settings.html), so other nodes may not receive the gossip info in time. - No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. If the node then stays offline for a certain amount of time (5 minutes by default), the cluster considers the node dead and starts to transfer its **range replicas** to other nodes as well. From eb144666b034eb003da8d316ef3ff67a630f1b79 Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Tue, 13 Mar 2018 15:32:20 -0400 Subject: [PATCH 06/13] Update cluster setting duration language --- v2.0/stop-a-node.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2.0/stop-a-node.md b/v2.0/stop-a-node.md index bec844868d3..cd116857999 100644 --- a/v2.0/stop-a-node.md +++ b/v2.0/stop-a-node.md @@ -18,7 +18,7 @@ When you stop a node, it performs the following steps: - Finishes in-flight requests. Note that this is a best effort that times out after the duration specified by the `server.shutdown.query_wait` [cluster setting](cluster-settings.html). - Transfers all *range leases* and Raft leadership to other nodes. -- Gossips its draining state to the cluster, so that other nodes do not try to distribute query planning to the draining node, and no leases are transferred to the draining node. Note that this is a best effort that times out at the `server.shutdown.drain_wait` [cluster setting](cluster-settings.html), so other nodes may not receive the gossip info in time. +- Gossips its draining state to the cluster, so that other nodes do not try to distribute query planning to the draining node, and no leases are transferred to the draining node. Note that this is a best effort that times out after the duration specified by the `server.shutdown.drain_wait` [cluster setting](cluster-settings.html), so other nodes may not receive the gossip info in time. - No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. If the node then stays offline for a certain amount of time (5 minutes by default), the cluster considers the node dead and starts to transfer its **range replicas** to other nodes as well. From 64e718fa417ecea36c738f0e6850db8585e9973f Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Wed, 14 Mar 2018 11:11:49 -0400 Subject: [PATCH 07/13] First crack at 1.1.{5,6} shutdown updates --- v1.1/stop-a-node.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/v1.1/stop-a-node.md b/v1.1/stop-a-node.md index ac9737b95a3..a1746b2bd7f 100644 --- a/v1.1/stop-a-node.md +++ b/v1.1/stop-a-node.md @@ -14,6 +14,11 @@ For information about permanently removing nodes to downsize a cluster or react ### How It Works +- Finishes in-flight requests. Note that this is a best effort that times out after the duration specified by the `???` cluster setting (1.1.5) +- Transfers all *range leases* and Raft leadership to other nodes. (1.1.6) +- Gossips its draining state to the cluster so that no leases are transferred to the draining node. Note that this is a best effort that times out after the duration specified by the `???` cluster setting, so other nodes may not receive the gossip info in time. (1.1.6) +- No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. (1.1.5) + When you stop a node, CockroachDB lets the node finish in-flight requests and transfers all **range leases** off the node before shutting it down. If the node then stays offline for a certain amount of time (5 minutes by default), the cluster considers the node dead and starts to transfer its **range replicas** to other nodes as well. After that, if the node comes back online, its range replicas will determine whether or not they are still valid members of replica groups. If a range replica is still valid and any data in its range has changed, it will receive updates from another replica in the group. If a range replica is no longer valid, it will be removed from the node. From 14139e9bf4e8204750d450ab73f9c1fcc31d5a48 Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Thu, 15 Mar 2018 10:50:13 -0400 Subject: [PATCH 08/13] Clarify that node cancels all current sessions --- v1.1/stop-a-node.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1.1/stop-a-node.md b/v1.1/stop-a-node.md index a1746b2bd7f..16037018141 100644 --- a/v1.1/stop-a-node.md +++ b/v1.1/stop-a-node.md @@ -14,7 +14,7 @@ For information about permanently removing nodes to downsize a cluster or react ### How It Works -- Finishes in-flight requests. Note that this is a best effort that times out after the duration specified by the `???` cluster setting (1.1.5) +- Cancels all current sessions without waiting. - Transfers all *range leases* and Raft leadership to other nodes. (1.1.6) - Gossips its draining state to the cluster so that no leases are transferred to the draining node. Note that this is a best effort that times out after the duration specified by the `???` cluster setting, so other nodes may not receive the gossip info in time. (1.1.6) - No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. (1.1.5) From 76a56222df363f3bf2bf63b0ee883342dc258b11 Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Thu, 15 Mar 2018 10:56:28 -0400 Subject: [PATCH 09/13] Remove version # from range leases bullet --- v1.1/stop-a-node.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1.1/stop-a-node.md b/v1.1/stop-a-node.md index 16037018141..dc2c3238cfc 100644 --- a/v1.1/stop-a-node.md +++ b/v1.1/stop-a-node.md @@ -15,7 +15,7 @@ For information about permanently removing nodes to downsize a cluster or react ### How It Works - Cancels all current sessions without waiting. -- Transfers all *range leases* and Raft leadership to other nodes. (1.1.6) +- Transfers all *range leases* and Raft leadership to other nodes. - Gossips its draining state to the cluster so that no leases are transferred to the draining node. Note that this is a best effort that times out after the duration specified by the `???` cluster setting, so other nodes may not receive the gossip info in time. (1.1.6) - No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. (1.1.5) From 6c72d46e92a5659fe6d70e18e2bbc618b573fc25 Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Thu, 15 Mar 2018 11:00:34 -0400 Subject: [PATCH 10/13] Remove version # from quorum note --- v1.1/stop-a-node.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1.1/stop-a-node.md b/v1.1/stop-a-node.md index dc2c3238cfc..0da4c90b998 100644 --- a/v1.1/stop-a-node.md +++ b/v1.1/stop-a-node.md @@ -17,7 +17,7 @@ For information about permanently removing nodes to downsize a cluster or react - Cancels all current sessions without waiting. - Transfers all *range leases* and Raft leadership to other nodes. - Gossips its draining state to the cluster so that no leases are transferred to the draining node. Note that this is a best effort that times out after the duration specified by the `???` cluster setting, so other nodes may not receive the gossip info in time. (1.1.6) -- No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. (1.1.5) +- No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. When you stop a node, CockroachDB lets the node finish in-flight requests and transfers all **range leases** off the node before shutting it down. If the node then stays offline for a certain amount of time (5 minutes by default), the cluster considers the node dead and starts to transfer its **range replicas** to other nodes as well. From d9d22a13efe08cf98fccad6ef9c44b846cf9c9d9 Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Thu, 15 Mar 2018 15:43:59 -0400 Subject: [PATCH 11/13] Remove duped info from para following list --- v1.1/stop-a-node.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1.1/stop-a-node.md b/v1.1/stop-a-node.md index 0da4c90b998..2356596e3e2 100644 --- a/v1.1/stop-a-node.md +++ b/v1.1/stop-a-node.md @@ -19,7 +19,7 @@ For information about permanently removing nodes to downsize a cluster or react - Gossips its draining state to the cluster so that no leases are transferred to the draining node. Note that this is a best effort that times out after the duration specified by the `???` cluster setting, so other nodes may not receive the gossip info in time. (1.1.6) - No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. -When you stop a node, CockroachDB lets the node finish in-flight requests and transfers all **range leases** off the node before shutting it down. If the node then stays offline for a certain amount of time (5 minutes by default), the cluster considers the node dead and starts to transfer its **range replicas** to other nodes as well. +If the node then stays offline for a certain amount of time (5 minutes by default), the cluster considers the node dead and starts to transfer its **range replicas** to other nodes as well. After that, if the node comes back online, its range replicas will determine whether or not they are still valid members of replica groups. If a range replica is still valid and any data in its range has changed, it will receive updates from another replica in the group. If a range replica is no longer valid, it will be removed from the node. From 094d164c56494a2366787b15985231649a0e6861 Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Mon, 19 Mar 2018 11:22:19 -0400 Subject: [PATCH 12/13] Update gossiped draining state note via feedback --- v1.1/stop-a-node.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1.1/stop-a-node.md b/v1.1/stop-a-node.md index 2356596e3e2..578c77ebbbc 100644 --- a/v1.1/stop-a-node.md +++ b/v1.1/stop-a-node.md @@ -16,7 +16,7 @@ For information about permanently removing nodes to downsize a cluster or react - Cancels all current sessions without waiting. - Transfers all *range leases* and Raft leadership to other nodes. -- Gossips its draining state to the cluster so that no leases are transferred to the draining node. Note that this is a best effort that times out after the duration specified by the `???` cluster setting, so other nodes may not receive the gossip info in time. (1.1.6) +- Gossips its draining state to the cluster so that no leases are transferred to the draining node. Note that this is a best effort, so other nodes may not receive the gossip info in time. - No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. If the node then stays offline for a certain amount of time (5 minutes by default), the cluster considers the node dead and starts to transfer its **range replicas** to other nodes as well. From bb14e1bcae2a9bdd83f806436b1d732ef75baf25 Mon Sep 17 00:00:00 2001 From: Rich Loveland Date: Mon, 19 Mar 2018 13:56:52 -0400 Subject: [PATCH 13/13] Make italic text bold --- v1.1/stop-a-node.md | 2 +- v2.0/stop-a-node.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/v1.1/stop-a-node.md b/v1.1/stop-a-node.md index 578c77ebbbc..f69a1e6a484 100644 --- a/v1.1/stop-a-node.md +++ b/v1.1/stop-a-node.md @@ -15,7 +15,7 @@ For information about permanently removing nodes to downsize a cluster or react ### How It Works - Cancels all current sessions without waiting. -- Transfers all *range leases* and Raft leadership to other nodes. +- Transfers all **range leases** and Raft leadership to other nodes. - Gossips its draining state to the cluster so that no leases are transferred to the draining node. Note that this is a best effort, so other nodes may not receive the gossip info in time. - No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down. diff --git a/v2.0/stop-a-node.md b/v2.0/stop-a-node.md index cd116857999..04d04473e22 100644 --- a/v2.0/stop-a-node.md +++ b/v2.0/stop-a-node.md @@ -17,7 +17,7 @@ For information about permanently removing nodes to downsize a cluster or react When you stop a node, it performs the following steps: - Finishes in-flight requests. Note that this is a best effort that times out after the duration specified by the `server.shutdown.query_wait` [cluster setting](cluster-settings.html). -- Transfers all *range leases* and Raft leadership to other nodes. +- Transfers all **range leases** and Raft leadership to other nodes. - Gossips its draining state to the cluster, so that other nodes do not try to distribute query planning to the draining node, and no leases are transferred to the draining node. Note that this is a best effort that times out after the duration specified by the `server.shutdown.drain_wait` [cluster setting](cluster-settings.html), so other nodes may not receive the gossip info in time. - No new ranges are transferred to the draining node, to avoid a possible loss of quorum after the node shuts down.