From 147552f7a42c8c90ba2d9aa7186119949f4ceb4e Mon Sep 17 00:00:00 2001
From: Cameron Nunez <cameron@cockroachlabs.com>
Date: Mon, 7 Feb 2022 11:47:18 -0500
Subject: [PATCH] server: enhance comments surrounding draining

This patch improves the comments related to the draining process,
fixing incorrect statements and providing more details to make
the process more easily understood.

Release note: None
---
 .../settings/settings-for-tenants.txt         |  6 +--
 docs/generated/settings/settings.html         |  6 +--
 pkg/kv/kvserver/store.go                      |  9 ++--
 pkg/server/admin.go                           |  2 +-
 pkg/server/admin_test.go                      |  4 +-
 pkg/server/drain.go                           | 45 ++++++++++++-------
 pkg/server/server_sql.go                      | 11 +++--
 pkg/server/tenant_admin.go                    |  2 +-
 pkg/sql/pgwire/server.go                      | 10 ++---
 9 files changed, 57 insertions(+), 38 deletions(-)

diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt
index c665a1c7ccb4..cf8543ce2634 100644
--- a/docs/generated/settings/settings-for-tenants.txt
+++ b/docs/generated/settings/settings-for-tenants.txt
@@ -60,9 +60,9 @@ server.oidc_authentication.provider_url	string		sets OIDC provider URL ({provide
 server.oidc_authentication.redirect_url	string	https://localhost:8080/oidc/v1/callback	sets OIDC redirect URL via a URL string or a JSON string containing a required `redirect_urls` key with an object that maps from region keys to URL strings (URLs should point to your load balancer and must route to the path /oidc/v1/callback) 
 server.oidc_authentication.scopes	string	openid	sets OIDC scopes to include with authentication request (space delimited list of strings, required to start with `openid`)
 server.rangelog.ttl	duration	720h0m0s	if nonzero, range log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours.
-server.shutdown.drain_wait	duration	0s	the amount of time a server waits in an unready state before proceeding with the rest of the shutdown process (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
-server.shutdown.lease_transfer_wait	duration	5s	the amount of time a server waits to transfer range leases before proceeding with the rest of the shutdown process (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
-server.shutdown.query_wait	duration	10s	the server will wait for at least this amount of time for active queries to finish (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
+server.shutdown.drain_wait	duration	0s	the amount of time a server waits in an unready state before proceeding with a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
+server.shutdown.lease_transfer_wait	duration	5s	the timeout for a single iteration of the range lease transfer phase of draining (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
+server.shutdown.query_wait	duration	10s	the timeout for waiting for active queries to finish during a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
 server.time_until_store_dead	duration	5m0s	the time after which if there is no new gossiped information about a store, it is considered dead
 server.user_login.cert_password_method.auto_scram_promotion.enabled	boolean	true	whether to automatically promote cert-password authentication to use SCRAM
 server.user_login.min_password_length	integer	1	the minimum length accepted for passwords set in cleartext via SQL. Note that a value lower than 1 is ignored: passwords cannot be empty in any case.
diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html
index 4f1e1b6f9ad7..5595bcf2481c 100644
--- a/docs/generated/settings/settings.html
+++ b/docs/generated/settings/settings.html
@@ -71,9 +71,9 @@
 <tr><td><code>server.oidc_authentication.redirect_url</code></td><td>string</td><td><code>https://localhost:8080/oidc/v1/callback</code></td><td>sets OIDC redirect URL via a URL string or a JSON string containing a required `redirect_urls` key with an object that maps from region keys to URL strings (URLs should point to your load balancer and must route to the path /oidc/v1/callback) </td></tr>
 <tr><td><code>server.oidc_authentication.scopes</code></td><td>string</td><td><code>openid</code></td><td>sets OIDC scopes to include with authentication request (space delimited list of strings, required to start with `openid`)</td></tr>
 <tr><td><code>server.rangelog.ttl</code></td><td>duration</td><td><code>720h0m0s</code></td><td>if nonzero, range log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours.</td></tr>
-<tr><td><code>server.shutdown.drain_wait</code></td><td>duration</td><td><code>0s</code></td><td>the amount of time a server waits in an unready state before proceeding with the rest of the shutdown process (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
-<tr><td><code>server.shutdown.lease_transfer_wait</code></td><td>duration</td><td><code>5s</code></td><td>the amount of time a server waits to transfer range leases before proceeding with the rest of the shutdown process (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
-<tr><td><code>server.shutdown.query_wait</code></td><td>duration</td><td><code>10s</code></td><td>the server will wait for at least this amount of time for active queries to finish (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
+<tr><td><code>server.shutdown.drain_wait</code></td><td>duration</td><td><code>0s</code></td><td>the amount of time a server waits in an unready state before proceeding with a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
+<tr><td><code>server.shutdown.lease_transfer_wait</code></td><td>duration</td><td><code>5s</code></td><td>the timeout for a single iteration of the range lease transfer phase of draining (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
+<tr><td><code>server.shutdown.query_wait</code></td><td>duration</td><td><code>10s</code></td><td>the timeout for waiting for active queries to finish during a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
 <tr><td><code>server.time_until_store_dead</code></td><td>duration</td><td><code>5m0s</code></td><td>the time after which if there is no new gossiped information about a store, it is considered dead</td></tr>
 <tr><td><code>server.user_login.cert_password_method.auto_scram_promotion.enabled</code></td><td>boolean</td><td><code>true</code></td><td>whether to automatically promote cert-password authentication to use SCRAM</td></tr>
 <tr><td><code>server.user_login.min_password_length</code></td><td>integer</td><td><code>1</code></td><td>the minimum length accepted for passwords set in cleartext via SQL. Note that a value lower than 1 is ignored: passwords cannot be empty in any case.</td></tr>
diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go
index 9ad8d59a8ed5..63de91819a50 100644
--- a/pkg/kv/kvserver/store.go
+++ b/pkg/kv/kvserver/store.go
@@ -188,13 +188,12 @@ var queueAdditionOnSystemConfigUpdateBurst = settings.RegisterIntSetting(
 	settings.NonNegativeInt,
 )
 
-// leaseTransferWait limits the amount of time a drain command waits for lease
-// and Raft leadership transfers.
+// leaseTransferWait is the timeout for a single iteration of draining range leases.
 var leaseTransferWait = func() *settings.DurationSetting {
 	s := settings.RegisterDurationSetting(
 		settings.TenantWritable,
 		leaseTransferWaitSettingName,
-		"the amount of time a server waits to transfer range leases before proceeding with the rest of the shutdown process "+
+		"the timeout for a single iteration of the range lease transfer phase of draining "+
 			"(note that the --drain-wait parameter for cockroach node drain may need adjustment "+
 			"after changing this setting)",
 		5*time.Second,
@@ -1355,6 +1354,10 @@ func (s *Store) AnnotateCtx(ctx context.Context) context.Context {
 // range leases, and attempts to transfer away any leases owned.
 // When called with 'false', returns to the normal mode of operation.
 //
+// Note: this code represents ONE round of draining. This code is iterated on
+// indefinitely until all leases are transferred away.
+// This iteration can be found here: pkg/cli/start.go, pkg/cli/quit.go.
+//
 // The reporter callback, if non-nil, is called on a best effort basis
 // to report work that needed to be done and which may or may not have
 // been done by the time this call returns. See the explanation in
diff --git a/pkg/server/admin.go b/pkg/server/admin.go
index a384e0f66d13..7e5eab421e49 100644
--- a/pkg/server/admin.go
+++ b/pkg/server/admin.go
@@ -1811,7 +1811,7 @@ func (s *adminServer) checkReadinessForHealthCheck(ctx context.Context) error {
 		return status.Errorf(codes.Unavailable, "node is shutting down")
 	}
 
-	if !s.server.sqlServer.acceptingClients.Get() {
+	if !s.server.sqlServer.isReady.Get() {
 		return status.Errorf(codes.Unavailable, "node is not accepting SQL clients")
 	}
 
diff --git a/pkg/server/admin_test.go b/pkg/server/admin_test.go
index f3819e377c9e..3123b8cc2169 100644
--- a/pkg/server/admin_test.go
+++ b/pkg/server/admin_test.go
@@ -1379,13 +1379,13 @@ func TestHealthAPI(t *testing.T) {
 	})
 
 	// Make the SQL listener appear unavailable. Verify that health fails after that.
-	ts.sqlServer.acceptingClients.Set(false)
+	ts.sqlServer.isReady.Set(false)
 	var resp serverpb.HealthResponse
 	err := getAdminJSONProto(s, "health?ready=1", &resp)
 	if err == nil {
 		t.Error("server appears ready even though SQL listener is not")
 	}
-	ts.sqlServer.acceptingClients.Set(true)
+	ts.sqlServer.isReady.Set(true)
 	err = getAdminJSONProto(s, "health?ready=1", &resp)
 	if err != nil {
 		t.Errorf("server not ready after SQL listener is ready again: %v", err)
diff --git a/pkg/server/drain.go b/pkg/server/drain.go
index ab258f6c1106..aee0cf46b08c 100644
--- a/pkg/server/drain.go
+++ b/pkg/server/drain.go
@@ -33,7 +33,7 @@ var (
 	queryWait = settings.RegisterDurationSetting(
 		settings.TenantWritable,
 		"server.shutdown.query_wait",
-		"the server will wait for at least this amount of time for active queries to finish "+
+		"the timeout for waiting for active queries to finish during a drain "+
 			"(note that the --drain-wait parameter for cockroach node drain may need adjustment "+
 			"after changing this setting)",
 		10*time.Second,
@@ -42,8 +42,7 @@ var (
 	drainWait = settings.RegisterDurationSetting(
 		settings.TenantWritable,
 		"server.shutdown.drain_wait",
-		"the amount of time a server waits in an unready state before proceeding with the rest "+
-			"of the shutdown process "+
+		"the amount of time a server waits in an unready state before proceeding with a drain "+
 			"(note that the --drain-wait parameter for cockroach node drain may need adjustment "+
 			"after changing this setting)",
 		0*time.Second,
@@ -231,6 +230,10 @@ func delegateDrain(
 }
 
 // runDrain idempotently activates the draining mode.
+// Note: this represents ONE round of draining. This code is iterated on
+// indefinitely until all range leases have been drained.
+// This iteration can be found here: pkg/cli/start.go, pkg/cli/quit.go.
+//
 // Note: new code should not be taught to use this method
 // directly. Use the Drain() RPC instead with a suitably crafted
 // DrainRequest.
@@ -279,17 +282,17 @@ func (s *drainServer) runDrain(
 func (s *drainServer) drainInner(
 	ctx context.Context, reporter func(int, redact.SafeString), verbose bool,
 ) (err error) {
-	// First drain all clients and SQL leases.
+	// Drain the SQL layer.
+	// Drains all SQL connections, distributed SQL execution flows, and SQL table leases.
 	if err = s.drainClients(ctx, reporter); err != nil {
 		return err
 	}
-	// Finally, mark the node as draining in liveness and drain the
-	// range leases.
+	// Mark the node as draining in liveness and drain all range leases.
 	return s.drainNode(ctx, reporter, verbose)
 }
 
-// isDraining returns true if either clients are being drained
-// or one of the stores on the node is not accepting replicas.
+// isDraining returns true if either SQL client connections are being drained
+// or if one of the stores on the node is not accepting replicas.
 func (s *drainServer) isDraining() bool {
 	return s.sqlServer.pgServer.IsDraining() || (s.kvServer.node != nil && s.kvServer.node.IsDraining())
 }
@@ -299,26 +302,34 @@ func (s *drainServer) drainClients(
 	ctx context.Context, reporter func(int, redact.SafeString),
 ) error {
 	shouldDelayDraining := !s.isDraining()
-	// Mark the server as draining in a way that probes to
-	// /health?ready=1 will notice.
+
+	// Set the gRPC mode of the node to "draining" and mark the node as "not ready".
+	// Probes to /health?ready=1 will now notice the change in the node's readiness.
 	s.grpc.setMode(modeDraining)
-	s.sqlServer.acceptingClients.Set(false)
-	// Wait for drainUnreadyWait. This will fail load balancer checks and
-	// delay draining so that client traffic can move off this node.
+	s.sqlServer.isReady.Set(false)
+
+	// Wait the duration of drainWait.
+	// This will fail load balancer checks and delay draining so that client
+	// traffic can move off this node.
 	// Note delay only happens on first call to drain.
 	if shouldDelayDraining {
 		s.drainSleepFn(drainWait.Get(&s.sqlServer.execCfg.Settings.SV))
 	}
 
-	// Disable incoming SQL clients up to the queryWait timeout.
+	// Drain all SQL connections.
+	// The queryWait duration is a timeout for waiting on clients
+	// to self-disconnect. If the timeout is reached, any remaining connections
+	// will be closed.
 	queryMaxWait := queryWait.Get(&s.sqlServer.execCfg.Settings.SV)
 	if err := s.sqlServer.pgServer.Drain(ctx, queryMaxWait, reporter); err != nil {
 		return err
 	}
-	// Stop ongoing SQL execution up to the queryWait timeout.
+
+	// Drain all distributed SQL execution flows.
+	// The queryWait duration is used to wait on currently running flows to finish.
 	s.sqlServer.distSQLServer.Drain(ctx, queryMaxWait, reporter)
 
-	// Drain the SQL leases. This must be done after the pgServer has
+	// Drain all SQL table leases. This must be done after the pgServer has
 	// given sessions a chance to finish ongoing work.
 	s.sqlServer.leaseMgr.SetDraining(ctx, true /* drain */, reporter)
 
@@ -335,8 +346,10 @@ func (s *drainServer) drainNode(
 		// No KV subsystem. Nothing to do.
 		return nil
 	}
+	// Set the node's liveness status to "draining".
 	if err = s.kvServer.nodeLiveness.SetDraining(ctx, true /* drain */, reporter); err != nil {
 		return err
 	}
+	// Mark the stores of the node as "draining" and drain all range leases.
 	return s.kvServer.node.SetDraining(true /* drain */, reporter, verbose)
 }
diff --git a/pkg/server/server_sql.go b/pkg/server/server_sql.go
index 78756794a7d6..613955b378ed 100644
--- a/pkg/server/server_sql.go
+++ b/pkg/server/server_sql.go
@@ -157,9 +157,12 @@ type SQLServer struct {
 	// SQL listeners in AcceptClients().
 	connManager netutil.Server
 
-	// set to true when the server has started accepting client conns.
-	// Used by health checks.
-	acceptingClients syncutil.AtomicBool
+	// isReady is the health status of the node. When true, the node is healthy;
+	// load balancers and connection management tools treat the node as "ready".
+	// When false, the node is unhealthy or "not ready", with load balancers and
+	// connection management tools learning this status from health checks.
+	// This is set to true when the server has started accepting client conns.
+	isReady syncutil.AtomicBool
 }
 
 // sqlServerOptionalKVArgs are the arguments supplied to newSQLServer which are
@@ -1332,7 +1335,7 @@ func (s *SQLServer) startServeSQL(
 		}
 	}
 
-	s.acceptingClients.Set(true)
+	s.isReady.Set(true)
 
 	return nil
 }
diff --git a/pkg/server/tenant_admin.go b/pkg/server/tenant_admin.go
index c7d6f68637bf..9511fc7ccba0 100644
--- a/pkg/server/tenant_admin.go
+++ b/pkg/server/tenant_admin.go
@@ -92,7 +92,7 @@ func (t *tenantAdminServer) Health(
 		return resp, nil
 	}
 
-	if !t.sqlServer.acceptingClients.Get() {
+	if !t.sqlServer.isReady.Get() {
 		return nil, status.Errorf(codes.Unavailable, "node is not accepting SQL clients")
 	}
 
diff --git a/pkg/sql/pgwire/server.go b/pkg/sql/pgwire/server.go
index cda8fa1f1aff..32e0baba792a 100644
--- a/pkg/sql/pgwire/server.go
+++ b/pkg/sql/pgwire/server.go
@@ -385,8 +385,8 @@ func (s *Server) Metrics() (res []interface{}) {
 	}
 }
 
-// Drain prevents new connections from being served and waits for drainWait for
-// open connections to terminate before canceling them.
+// Drain prevents new connections from being served and waits the duration of
+// queryWait for open connections to terminate before canceling them.
 // An error will be returned when connections that have been canceled have not
 // responded to this cancellation and closed themselves in time. The server
 // will remain in draining state, though open connections may continue to
@@ -400,9 +400,9 @@ func (s *Server) Metrics() (res []interface{}) {
 // been done by the time this call returns. See the explanation in
 // pkg/server/drain.go for details.
 func (s *Server) Drain(
-	ctx context.Context, drainWait time.Duration, reporter func(int, redact.SafeString),
+	ctx context.Context, queryWait time.Duration, reporter func(int, redact.SafeString),
 ) error {
-	return s.drainImpl(ctx, drainWait, cancelMaxWait, reporter)
+	return s.drainImpl(ctx, queryWait, cancelMaxWait, reporter)
 }
 
 // Undrain switches the server back to the normal mode of operation in which
@@ -489,7 +489,7 @@ func (s *Server) drainImpl(
 		}
 	}()
 
-	// Wait for all connections to finish up to drainWait.
+	// Wait for connections to finish up their queries for the duration of queryWait.
 	select {
 	case <-time.After(queryWait):
 		log.Ops.Warningf(ctx, "canceling all sessions after waiting %s", queryWait)