diff --git a/docs/root/intro/arch_overview/operations/draining.rst b/docs/root/intro/arch_overview/operations/draining.rst index 0a0932e57a92..18003197c844 100644 --- a/docs/root/intro/arch_overview/operations/draining.rst +++ b/docs/root/intro/arch_overview/operations/draining.rst @@ -3,16 +3,42 @@ Draining ======== -Draining is the process by which Envoy attempts to gracefully shed connections in response to -various events. Draining occurs at the following times: +In a few different scenarios, Envoy will attempt to gracefully shed connections. For instance, +during server shutdown, existing requests can be discouraged and listeners set to stop accepting, +to reduce the number of open connections when the server shuts down. Draining behaviour is defined +by the server options in addition to individual listener configs. +Draining occurs at the following times: + +* The server is being :ref:`hot restarted `. +* The server begins the graceful drain sequence via the :ref:`drain_listeners?graceful + ` admin endpoint. * The server has been manually health check failed via the :ref:`healthcheck/fail ` admin endpoint. See the :ref:`health check filter ` architecture overview for more information. -* The server is being :ref:`hot restarted `. * Individual listeners are being modified or removed via :ref:`LDS `. +By default, the Envoy server will close listeners immediately on server shutdown. To drain listeners +for some duration of time prior to server shutdown, use :ref:`drain_listeners ` +before shutting down the server. The listeners will be directly stopped without any graceful draining behaviour, +and cease accepting new connections immediately. + +To add a graceful drain period prior to listeners being closed, use the query parameter +:ref:`drain_listeners?graceful `. By default, Envoy +will discourage requests for some period of time (as determined by :option:`--drain-time-s`). +The behaviour of request discouraging is determined by the drain manager. + +Note that although draining is a per-listener concept, it must be supported at the network filter +level. Currently the only filters that support graceful draining are +:ref:`Redis `, +:ref:`Mongo `, +and :ref:`HTTP connection manager `. + +By default, the :ref:`HTTP connection manager ` filter will +add "Connection: close" to HTTP1 requests, send HTTP2 GOAWAY, and terminate connections +on request completion (after the delayed close period). + Each :ref:`configured listener ` has a :ref:`drain_type ` setting which controls when draining takes place. The currently supported values are: @@ -27,13 +53,3 @@ modify_only It may be desirable to set *modify_only* on egress listeners so they only drain during modifications while relying on ingress listener draining to perform full server draining when attempting to do a controlled shutdown. - -Note that although draining is a per-listener concept, it must be supported at the network filter -level. Currently the only filters that support graceful draining are -:ref:`HTTP connection manager `, -:ref:`Redis `, and -:ref:`Mongo `. - -Listeners can also be stopped via :ref:`drain_listeners `. In this case, -they are directly stopped (without going through the actual draining process) on worker threads, -so that they will not accept any new requests. diff --git a/docs/root/operations/admin.rst b/docs/root/operations/admin.rst index c4a3cd81ee13..b90a1461f415 100644 --- a/docs/root/operations/admin.rst +++ b/docs/root/operations/admin.rst @@ -258,6 +258,12 @@ modify different aspects of the server: :ref:`Listener ` is used to determine whether a listener is inbound or outbound. + .. http:post:: /drain_listeners?graceful + + When draining listeners, enter a graceful drain period prior to closing listeners. + This behaviour and duration is configurable via server options or CLI + (:option:`--drain-time-s` and :option:`--drain-strategy`). + .. attention:: This operation directly stops the matched listeners on workers. Once listeners in a given diff --git a/include/envoy/server/drain_manager.h b/include/envoy/server/drain_manager.h index 0f29b0cd3eed..49ecc194166a 100644 --- a/include/envoy/server/drain_manager.h +++ b/include/envoy/server/drain_manager.h @@ -21,6 +21,11 @@ class DrainManager : public Network::DrainDecision { */ virtual void startDrainSequence(std::function drain_complete_cb) PURE; + /** + * @return whether the drain sequence has started. + */ + virtual bool draining() const PURE; + /** * Invoked in the newly launched primary process to begin the parent shutdown sequence. At the end * of the sequence the previous primary process will be terminated. diff --git a/source/server/admin/listeners_handler.cc b/source/server/admin/listeners_handler.cc index 3d813ad4b4c8..93407d9eb6cc 100644 --- a/source/server/admin/listeners_handler.cc +++ b/source/server/admin/listeners_handler.cc @@ -16,10 +16,24 @@ ListenersHandler::ListenersHandler(Server::Instance& server) : HandlerContextBas Http::Code ListenersHandler::handlerDrainListeners(absl::string_view url, Http::ResponseHeaderMap&, Buffer::Instance& response, AdminStream&) { const Http::Utility::QueryParams params = Http::Utility::parseQueryString(url); + ListenerManager::StopListenersType stop_listeners_type = params.find("inboundonly") != params.end() ? ListenerManager::StopListenersType::InboundOnly : ListenerManager::StopListenersType::All; - server_.listenerManager().stopListeners(stop_listeners_type); + + const bool graceful = params.find("graceful") != params.end(); + if (graceful) { + // Ignore calls to /drain_listeners?graceful if the drain sequence has + // already started. + if (!server_.drainManager().draining()) { + server_.drainManager().startDrainSequence([this, stop_listeners_type]() { + server_.listenerManager().stopListeners(stop_listeners_type); + }); + } + } else { + server_.listenerManager().stopListeners(stop_listeners_type); + } + response.add("OK\n"); return Http::Code::OK; } diff --git a/source/server/drain_manager_impl.h b/source/server/drain_manager_impl.h index 38a02465b761..c8056f22396c 100644 --- a/source/server/drain_manager_impl.h +++ b/source/server/drain_manager_impl.h @@ -28,6 +28,7 @@ class DrainManagerImpl : Logger::Loggable, public DrainManager // Server::DrainManager void startDrainSequence(std::function drain_complete_cb) override; + bool draining() const override { return draining_; } void startParentShutdownSequence() override; private: diff --git a/test/integration/drain_close_integration_test.cc b/test/integration/drain_close_integration_test.cc index cbe58e973ecd..aa0afd8d141b 100644 --- a/test/integration/drain_close_integration_test.cc +++ b/test/integration/drain_close_integration_test.cc @@ -75,6 +75,106 @@ TEST_P(DrainCloseIntegrationTest, DrainCloseImmediate) { TEST_P(DrainCloseIntegrationTest, AdminDrain) { testAdminDrain(downstreamProtocol()); } +TEST_P(DrainCloseIntegrationTest, AdminGracefulDrain) { + drain_strategy_ = Server::DrainStrategy::Immediate; + drain_time_ = std::chrono::seconds(999); + initialize(); + fake_upstreams_[0]->set_allow_unexpected_disconnects(true); + uint32_t http_port = lookupPort("http"); + codec_client_ = makeHttpConnection(http_port); + + auto response = codec_client_->makeHeaderOnlyRequest(default_request_headers_); + waitForNextUpstreamRequest(0); + upstream_request_->encodeHeaders(default_response_headers_, true); + response->waitForEndStream(); + ASSERT_TRUE(response->complete()); + EXPECT_THAT(response->headers(), Http::HttpStatusIs("200")); + // The request is completed but the connection remains open. + EXPECT_TRUE(codec_client_->connected()); + + // Invoke /drain_listeners with graceful drain + BufferingStreamDecoderPtr admin_response = IntegrationUtil::makeSingleRequest( + lookupPort("admin"), "POST", "/drain_listeners?graceful", "", downstreamProtocol(), version_); + EXPECT_EQ(admin_response->headers().Status()->value().getStringView(), "200"); + + // With a 999s graceful drain period, the listener should still be open. + EXPECT_EQ(test_server_->counter("listener_manager.listener_stopped")->value(), 0); + + response = codec_client_->makeHeaderOnlyRequest(default_request_headers_); + waitForNextUpstreamRequest(0); + upstream_request_->encodeHeaders(default_response_headers_, true); + response->waitForEndStream(); + ASSERT_TRUE(response->complete()); + EXPECT_THAT(response->headers(), Http::HttpStatusIs("200")); + + // Connections will terminate on request complete + ASSERT_TRUE(codec_client_->waitForDisconnect()); + if (downstream_protocol_ == Http::CodecClient::Type::HTTP2) { + EXPECT_TRUE(codec_client_->sawGoAway()); + } else { + EXPECT_EQ("close", response->headers().getConnectionValue()); + } + + // New connections can still be made. + auto second_codec_client_ = makeRawHttpConnection(makeClientConnection(http_port)); + EXPECT_TRUE(second_codec_client_->connected()); + + // Invoke /drain_listeners and shut down listeners. + second_codec_client_->rawConnection().close(Network::ConnectionCloseType::NoFlush); + admin_response = IntegrationUtil::makeSingleRequest( + lookupPort("admin"), "POST", "/drain_listeners", "", downstreamProtocol(), version_); + EXPECT_EQ(admin_response->headers().Status()->value().getStringView(), "200"); + + test_server_->waitForCounterEq("listener_manager.listener_stopped", 1); + EXPECT_NO_THROW(Network::TcpListenSocket( + Network::Utility::getAddressWithPort(*Network::Test::getCanonicalLoopbackAddress(version_), + http_port), + nullptr, true)); +} + +TEST_P(DrainCloseIntegrationTest, RepeatedAdminGracefulDrain) { + // Use the default gradual probabilistic DrainStrategy so drainClose() + // behaviour isn't conflated with whether the drain sequence has started. + drain_time_ = std::chrono::seconds(999); + initialize(); + fake_upstreams_[0]->set_allow_unexpected_disconnects(true); + uint32_t http_port = lookupPort("http"); + codec_client_ = makeHttpConnection(http_port); + + auto response = codec_client_->makeHeaderOnlyRequest(default_request_headers_); + waitForNextUpstreamRequest(0); + upstream_request_->encodeHeaders(default_response_headers_, true); + response->waitForEndStream(); + + // Invoke /drain_listeners with graceful drain + BufferingStreamDecoderPtr admin_response = IntegrationUtil::makeSingleRequest( + lookupPort("admin"), "POST", "/drain_listeners?graceful", "", downstreamProtocol(), version_); + EXPECT_EQ(admin_response->headers().Status()->value().getStringView(), "200"); + EXPECT_EQ(test_server_->counter("listener_manager.listener_stopped")->value(), 0); + + admin_response = IntegrationUtil::makeSingleRequest( + lookupPort("admin"), "POST", "/drain_listeners?graceful", "", downstreamProtocol(), version_); + EXPECT_EQ(admin_response->headers().Status()->value().getStringView(), "200"); + EXPECT_EQ(admin_response->headers().Status()->value().getStringView(), "200"); + + response = codec_client_->makeHeaderOnlyRequest(default_request_headers_); + waitForNextUpstreamRequest(0); + upstream_request_->encodeHeaders(default_response_headers_, true); + response->waitForEndStream(); + ASSERT_TRUE(response->complete()); + EXPECT_THAT(response->headers(), Http::HttpStatusIs("200")); + + admin_response = IntegrationUtil::makeSingleRequest( + lookupPort("admin"), "POST", "/drain_listeners", "", downstreamProtocol(), version_); + EXPECT_EQ(admin_response->headers().Status()->value().getStringView(), "200"); + + test_server_->waitForCounterEq("listener_manager.listener_stopped", 1); + EXPECT_NO_THROW(Network::TcpListenSocket( + Network::Utility::getAddressWithPort(*Network::Test::getCanonicalLoopbackAddress(version_), + http_port), + nullptr, true)); +} + INSTANTIATE_TEST_SUITE_P(Protocols, DrainCloseIntegrationTest, testing::ValuesIn(HttpProtocolIntegrationTest::getProtocolTestParams( {Http::CodecClient::Type::HTTP1, Http::CodecClient::Type::HTTP2}, diff --git a/test/mocks/server/mocks.h b/test/mocks/server/mocks.h index 34ffef72e615..6e5060014f72 100644 --- a/test/mocks/server/mocks.h +++ b/test/mocks/server/mocks.h @@ -192,6 +192,7 @@ class MockDrainManager : public DrainManager { // Server::DrainManager MOCK_METHOD(bool, drainClose, (), (const)); + MOCK_METHOD(bool, draining, (), (const)); MOCK_METHOD(void, startDrainSequence, (std::function completion)); MOCK_METHOD(void, startParentShutdownSequence, ()); diff --git a/test/server/drain_manager_impl_test.cc b/test/server/drain_manager_impl_test.cc index be09ee0ec7fb..9afeba1b7955 100644 --- a/test/server/drain_manager_impl_test.cc +++ b/test/server/drain_manager_impl_test.cc @@ -126,7 +126,10 @@ TEST_P(DrainManagerImplTest, DrainDeadlineProbability) { EXPECT_TRUE(drain_manager.drainClose()); EXPECT_CALL(server_, healthCheckFailed()).WillRepeatedly(Return(false)); EXPECT_FALSE(drain_manager.drainClose()); + EXPECT_FALSE(drain_manager.draining()); + drain_manager.startDrainSequence([] {}); + EXPECT_TRUE(drain_manager.draining()); if (drain_gradually) { // random() should be called when elapsed time < drain timeout