From 82bdb854201137a3648635b29e31ba4790c08d2a Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Thu, 29 Aug 2019 15:49:11 -0700 Subject: [PATCH 1/6] per-worker listener and watchdog stats This PR does a few things: 1) Adds per-worker listener stats, useful for viewing worker connection imbalance. 2) Adds per-worker watchdog miss stats, useful for viewing per worker event loop latency. 3) Misc connection handling cleanups. Part of https://github.com/envoyproxy/envoy/issues/4602 Signed-off-by: Matt Klein --- api/envoy/config/bootstrap/v2/bootstrap.proto | 5 +- docs/root/configuration/listeners/stats.rst | 20 ++++++ docs/root/intro/version_history.rst | 3 + docs/root/operations/performance.rst | 25 ++++++- include/envoy/network/connection_handler.h | 17 ++--- include/envoy/server/guarddog.h | 4 +- include/envoy/server/worker.h | 5 +- source/common/common/logger.h | 1 + source/common/event/dispatched_thread.cc | 41 ------------ source/common/event/dispatched_thread.h | 67 ------------------- .../quiche/active_quic_listener.cc | 17 ++--- .../quiche/active_quic_listener.h | 19 +++--- .../server/active_raw_udp_listener_config.cc | 9 +-- .../server/active_raw_udp_listener_config.h | 2 +- source/server/config_validation/server.h | 2 +- source/server/connection_handler_impl.cc | 63 +++++++++-------- source/server/connection_handler_impl.h | 50 +++++++------- source/server/guarddog_impl.cc | 57 ++++++++++------ source/server/guarddog_impl.h | 14 +++- source/server/listener_manager_impl.cc | 3 +- source/server/server.cc | 5 +- source/server/worker_impl.cc | 24 ++++--- source/server/worker_impl.h | 7 +- .../event/dispatched_thread_impl_test.cc | 51 -------------- .../quiche/active_quic_listener_test.cc | 6 +- .../quiche/envoy_quic_dispatcher_test.cc | 2 +- test/integration/fake_upstream.cc | 2 +- test/integration/integration_test.cc | 19 ++++++ test/mocks/network/mocks.h | 1 + test/mocks/server/mocks.cc | 2 +- test/mocks/server/mocks.h | 7 +- test/server/connection_handler_test.cc | 13 +++- test/server/guarddog_impl_test.cc | 62 +++++++++++------ test/server/worker_impl_test.cc | 2 +- 34 files changed, 308 insertions(+), 319 deletions(-) delete mode 100644 source/common/event/dispatched_thread.cc delete mode 100644 source/common/event/dispatched_thread.h delete mode 100644 test/common/event/dispatched_thread_impl_test.cc diff --git a/api/envoy/config/bootstrap/v2/bootstrap.proto b/api/envoy/config/bootstrap/v2/bootstrap.proto index b8f17de61870..59e577ceadcd 100644 --- a/api/envoy/config/bootstrap/v2/bootstrap.proto +++ b/api/envoy/config/bootstrap/v2/bootstrap.proto @@ -202,13 +202,14 @@ message ClusterManager { // Envoy process watchdog configuration. When configured, this monitors for // nonresponsive threads and kills the process after the configured thresholds. +// See the :ref:`watchdog documentation ` for more information. message Watchdog { // The duration after which Envoy counts a nonresponsive thread in the - // *server.watchdog_miss* statistic. If not specified the default is 200ms. + // *watchdog_miss* statistic. If not specified the default is 200ms. google.protobuf.Duration miss_timeout = 1; // The duration after which Envoy counts a nonresponsive thread in the - // *server.watchdog_mega_miss* statistic. If not specified the default is + // *watchdog_mega_miss* statistic. If not specified the default is // 1000ms. google.protobuf.Duration megamiss_timeout = 2; diff --git a/docs/root/configuration/listeners/stats.rst b/docs/root/configuration/listeners/stats.rst index 03eea44cb751..4615698df1e5 100644 --- a/docs/root/configuration/listeners/stats.rst +++ b/docs/root/configuration/listeners/stats.rst @@ -32,6 +32,26 @@ Every listener has a statistics tree rooted at *listener.
.* with the fo ssl.sigalgs., Counter, Total successful TLS connections that used signature algorithm ssl.versions., Counter, Total successful TLS connections that used protocol version +.. _config_listener_stats_per_handler: + +Per-handler Listener Stats +-------------------------- + +Every listener additionally has a statistics tree rooted at *listener.
..* which +contains *per-handler* statistics. As described in the +:ref:`threading model ` documentation, Envoy has a threading model which +includes the *main thread* as well as a number of *worker threads* which are controlled by the +:option:`--concurrency` option. Along these lines, ** is equal to *main_thread*, +*worker_0*, *worker_1*, etc. These statistics can be used to look for per-handler/worker imbalance +on either accepted or active connections. + +.. csv-table:: + :header: Name, Type, Description + :widths: 1, 1, 2 + + downstream_cx_total, Counter, Total connections on this handler. + downstream_cx_active, Counter, Total active connections on this handler. + Listener manager ---------------- diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst index a80968050e6c..157bd23f2da8 100644 --- a/docs/root/intro/version_history.rst +++ b/docs/root/intro/version_history.rst @@ -54,6 +54,9 @@ Version history * router check tool: add deprecated field check. * router check tool: add flag for only printing results of failed tests. * server: added a post initialization lifecycle event, in addition to the existing startup and shutdown events. +* server: added :ref:`per-handler listener stats ` and + :ref:`per-worker watchdog stats ` to help diagnosing event + loop imbalance and general performance issues. * thrift_proxy: fix crashing bug on invalid transport/protocol framing * tls: added verification of IP address SAN fields in certificates against configured SANs in the * tracing: added support to the Zipkin reporter for sending list of spans as Zipkin JSON v2 and protobuf message over HTTP. diff --git a/docs/root/operations/performance.rst b/docs/root/operations/performance.rst index d7066374f3ed..555e7a03b5eb 100644 --- a/docs/root/operations/performance.rst +++ b/docs/root/operations/performance.rst @@ -34,8 +34,8 @@ to true. the wire individually because the statsd protocol doesn't have any way to represent a histogram summary. Be aware that this can be a very large volume of data. -Statistics ----------- +Event loop statistics +--------------------- The event dispatcher for the main thread has a statistics tree rooted at *server.dispatcher.*, and the event dispatcher for each worker thread has a statistics tree rooted at @@ -49,3 +49,24 @@ the event dispatcher for each worker thread has a statistics tree rooted at poll_delay_us, Histogram, Polling delays in microseconds Note that any auxiliary threads are not included here. + +.. _operations_performance_watchdog: + +Watchdog +-------- + +In addition to event loop statistics, Envoy also include a configurable +:ref:`watchdog ` system that can increment +statistics when Envoy is not responsive and optionally kill the server. The statistics are useful +for understanding at a high level whether Envoy's event loop is not responsive either because it is +doing too much work, blocking, or not being scheduled by the OS. + +The watchdog emits statistics in both the *server.* and *server..* trees. +** is equal to *main_thread*, *worker_0*, *worker_1*, etc. + +.. csv-table:: + :header: Name, Type, Description + :widths: 1, 1, 2 + + watchdog_miss, Counter, Number of standard misses + watchdog_mega_miss, Counter, Number of mega misses diff --git a/include/envoy/network/connection_handler.h b/include/envoy/network/connection_handler.h index 3ea5df65d84c..da878ca96361 100644 --- a/include/envoy/network/connection_handler.h +++ b/include/envoy/network/connection_handler.h @@ -9,8 +9,6 @@ #include "envoy/network/listener.h" #include "envoy/ssl/context.h" -#include "spdlog/spdlog.h" - namespace Envoy { namespace Network { @@ -84,6 +82,11 @@ class ConnectionHandler { */ virtual void enableListeners() PURE; + /** + * @return the stat prefix used for per-handler stats. + */ + virtual const std::string& statPrefix() PURE; + /** * Used by ConnectionHandler to manage listeners. */ @@ -95,10 +98,12 @@ class ConnectionHandler { * @return the tag value as configured. */ virtual uint64_t listenerTag() PURE; + /** * @return the actual Listener object. */ virtual Listener* listener() PURE; + /** * Destroy the actual Listener it wraps. */ @@ -111,8 +116,7 @@ class ConnectionHandler { using ConnectionHandlerPtr = std::unique_ptr; /** - * A registered factory interface to create different kinds of - * ActiveUdpListener. + * A registered factory interface to create different kinds of ActiveUdpListener. */ class ActiveUdpListenerFactory { public: @@ -123,16 +127,13 @@ class ActiveUdpListenerFactory { * according to given config. * @param parent is the owner of the created ActiveListener objects. * @param dispatcher is used to create actual UDP listener. - * @param logger might not need to be passed in. - * TODO(danzh): investigate if possible to use statically defined logger in ActiveUdpListener - * implementation instead. * @param config provides information needed to create ActiveUdpListener and * UdpListener objects. * @return the ActiveUdpListener created. */ virtual ConnectionHandler::ActiveListenerPtr createActiveUdpListener(ConnectionHandler& parent, Event::Dispatcher& disptacher, - spdlog::logger& logger, Network::ListenerConfig& config) const PURE; + Network::ListenerConfig& config) const PURE; }; using ActiveUdpListenerFactoryPtr = std::unique_ptr; diff --git a/include/envoy/server/guarddog.h b/include/envoy/server/guarddog.h index 4386aa7f9051..08b8f53646a0 100644 --- a/include/envoy/server/guarddog.h +++ b/include/envoy/server/guarddog.h @@ -27,8 +27,10 @@ class GuardDog { * stopWatching() method to remove it from the list of watched objects. * * @param thread_id a Thread::ThreadId containing the system thread id + * @param thread_name supplies the name of the thread which is used for per-thread miss stats. */ - virtual WatchDogSharedPtr createWatchDog(Thread::ThreadId thread_id) PURE; + virtual WatchDogSharedPtr createWatchDog(Thread::ThreadId thread_id, + const std::string& thread_name) PURE; /** * Tell the GuardDog to forget about this WatchDog. diff --git a/include/envoy/server/worker.h b/include/envoy/server/worker.h index f412dc922235..887e39b2e111 100644 --- a/include/envoy/server/worker.h +++ b/include/envoy/server/worker.h @@ -91,9 +91,12 @@ class WorkerFactory { virtual ~WorkerFactory() = default; /** + * @param overload_manager supplies the server's overload manager. + * @param worker_name supplies the name of the worker, used for per-worker stats. * @return WorkerPtr a new worker. */ - virtual WorkerPtr createWorker(OverloadManager& overload_manager) PURE; + virtual WorkerPtr createWorker(OverloadManager& overload_manager, + const std::string& worker_name) PURE; }; } // namespace Server diff --git a/source/common/common/logger.h b/source/common/common/logger.h index 22a0255a0e9f..58f06b447f0c 100644 --- a/source/common/common/logger.h +++ b/source/common/common/logger.h @@ -29,6 +29,7 @@ namespace Logger { FUNCTION(client) \ FUNCTION(config) \ FUNCTION(connection) \ + FUNCTION(conn_handler) \ FUNCTION(dubbo) \ FUNCTION(file) \ FUNCTION(filter) \ diff --git a/source/common/event/dispatched_thread.cc b/source/common/event/dispatched_thread.cc deleted file mode 100644 index c215ff56cbca..000000000000 --- a/source/common/event/dispatched_thread.cc +++ /dev/null @@ -1,41 +0,0 @@ -#include "common/event/dispatched_thread.h" - -#include -#include - -#include "envoy/common/time.h" -#include "envoy/event/dispatcher.h" -#include "envoy/server/configuration.h" -#include "envoy/thread/thread.h" - -#include "spdlog/spdlog.h" - -namespace Envoy { -namespace Event { - -void DispatchedThreadImpl::start(Server::GuardDog& guard_dog) { - thread_ = - api_.threadFactory().createThread([this, &guard_dog]() -> void { threadRoutine(guard_dog); }); -} - -void DispatchedThreadImpl::exit() { - if (thread_) { - dispatcher_->exit(); - thread_->join(); - } -} - -void DispatchedThreadImpl::threadRoutine(Server::GuardDog& guard_dog) { - ENVOY_LOG(debug, "dispatched thread entering dispatch loop"); - auto watchdog = guard_dog.createWatchDog(api_.threadFactory().currentThreadId()); - watchdog->startWatchdog(*dispatcher_); - dispatcher_->run(Dispatcher::RunType::Block); - ENVOY_LOG(debug, "dispatched thread exited dispatch loop"); - guard_dog.stopWatching(watchdog); - - watchdog.reset(); - dispatcher_.reset(); -} - -} // namespace Event -} // namespace Envoy diff --git a/source/common/event/dispatched_thread.h b/source/common/event/dispatched_thread.h deleted file mode 100644 index 9dc5db53d80d..000000000000 --- a/source/common/event/dispatched_thread.h +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "envoy/api/api.h" -#include "envoy/event/dispatcher.h" -#include "envoy/event/timer.h" -#include "envoy/server/configuration.h" -#include "envoy/server/guarddog.h" - -#include "common/common/thread.h" -#include "common/event/dispatcher_impl.h" - -namespace Envoy { -namespace Event { - -/** - * Generic dispatched thread. - * - * This provides basic functionality for a thread which has the "Dispatched - * Nature" (runs an event loop) but does not want to do listening and accept new - * connections like regular Worker threads, or any of the other functionality - * specific to "Worker" threads. This is particularly useful if you need a - * special purpose thread that will issue or receive gRPC calls. - * - * These features are set up: - * 1) Dispatcher support: - * open connections, open files, callback posting, timers, listen - * 2) GuardDog deadlock monitoring - * - * These features are not: - * 1) Thread local storage (we don't want runOnAllThreads callbacks to run on - * this thread). - * 2) ConnectionHandler and listeners - * - * TODO(dnoe): Worker should probably be refactored to leverage this. - */ -class DispatchedThreadImpl : Logger::Loggable { -public: - DispatchedThreadImpl(Api::Api& api) : api_(api), dispatcher_(api_.allocateDispatcher()) {} - - /** - * Start the thread. - * - * @param guard_dog GuardDog instance to register with. - */ - void start(Envoy::Server::GuardDog& guard_dog); - - Dispatcher& dispatcher() { return *dispatcher_; } - - /** - * Exit the dispatched thread. Will block until the thread joins. - */ - void exit(); - -private: - void threadRoutine(Envoy::Server::GuardDog& guard_dog); - - Api::Api& api_; - DispatcherPtr dispatcher_; - Thread::ThreadPtr thread_; -}; - -} // namespace Event -} // namespace Envoy diff --git a/source/extensions/quic_listeners/quiche/active_quic_listener.cc b/source/extensions/quic_listeners/quiche/active_quic_listener.cc index 5f6b5ff71a43..6272cc98df15 100644 --- a/source/extensions/quic_listeners/quiche/active_quic_listener.cc +++ b/source/extensions/quic_listeners/quiche/active_quic_listener.cc @@ -11,29 +11,30 @@ namespace Envoy { namespace Quic { ActiveQuicListener::ActiveQuicListener(Event::Dispatcher& dispatcher, - Network::ConnectionHandler& parent, spdlog::logger& logger, + Network::ConnectionHandler& parent, Network::ListenerConfig& listener_config, const quic::QuicConfig& quic_config) : ActiveQuicListener(dispatcher, parent, - dispatcher.createUdpListener(listener_config.socket(), *this), logger, + dispatcher.createUdpListener(listener_config.socket(), *this), listener_config, quic_config) {} ActiveQuicListener::ActiveQuicListener(Event::Dispatcher& dispatcher, Network::ConnectionHandler& parent, - Network::UdpListenerPtr&& listener, spdlog::logger& logger, + Network::UdpListenerPtr&& listener, Network::ListenerConfig& listener_config, const quic::QuicConfig& quic_config) : ActiveQuicListener(dispatcher, parent, std::make_unique(*listener), - std::move(listener), logger, listener_config, quic_config) {} + std::move(listener), listener_config, quic_config) {} ActiveQuicListener::ActiveQuicListener(Event::Dispatcher& dispatcher, Network::ConnectionHandler& parent, std::unique_ptr writer, - Network::UdpListenerPtr&& listener, spdlog::logger& logger, + Network::UdpListenerPtr&& listener, Network::ListenerConfig& listener_config, const quic::QuicConfig& quic_config) - : Server::ConnectionHandlerImpl::ActiveListenerImplBase(std::move(listener), listener_config), - logger_(logger), dispatcher_(dispatcher), version_manager_(quic::CurrentSupportedVersions()) { + : Server::ConnectionHandlerImpl::ActiveListenerImplBase(parent, std::move(listener), + listener_config), + dispatcher_(dispatcher), version_manager_(quic::CurrentSupportedVersions()) { quic::QuicRandom* const random = quic::QuicRandom::GetInstance(); random->RandBytes(random_seed_, sizeof(random_seed_)); crypto_config_ = std::make_unique( @@ -51,7 +52,7 @@ ActiveQuicListener::ActiveQuicListener(Event::Dispatcher& dispatcher, } void ActiveQuicListener::onListenerShutdown() { - ENVOY_LOG_TO_LOGGER(logger_, info, "Quic listener {} shutdown.", config_.name()); + ENVOY_LOG(info, "Quic listener {} shutdown.", config_.name()); quic_dispatcher_->Shutdown(); } diff --git a/source/extensions/quic_listeners/quiche/active_quic_listener.h b/source/extensions/quic_listeners/quiche/active_quic_listener.h index bb327d12fd60..69615a8c83be 100644 --- a/source/extensions/quic_listeners/quiche/active_quic_listener.h +++ b/source/extensions/quic_listeners/quiche/active_quic_listener.h @@ -22,15 +22,15 @@ class ActiveQuicListener : public Network::UdpListenerCallbacks, // filter. // TODO(danzh): clean up meaningless inheritance. public Network::UdpListenerFilterManager, - public Network::UdpReadFilterCallbacks { + public Network::UdpReadFilterCallbacks, + Logger::Loggable { public: ActiveQuicListener(Event::Dispatcher& dispatcher, Network::ConnectionHandler& parent, - spdlog::logger& logger, Network::ListenerConfig& listener_config, - const quic::QuicConfig& quic_config); + Network::ListenerConfig& listener_config, const quic::QuicConfig& quic_config); ActiveQuicListener(Event::Dispatcher& dispatcher, Network::ConnectionHandler& parent, - Network::UdpListenerPtr&& listener, spdlog::logger& logger, - Network::ListenerConfig& listener_config, const quic::QuicConfig& quic_config); + Network::UdpListenerPtr&& listener, Network::ListenerConfig& listener_config, + const quic::QuicConfig& quic_config); // TODO(#7465): Make this a callback. void onListenerShutdown(); @@ -57,12 +57,11 @@ class ActiveQuicListener : public Network::UdpListenerCallbacks, ActiveQuicListener(Event::Dispatcher& dispatcher, Network::ConnectionHandler& parent, std::unique_ptr writer, - Network::UdpListenerPtr&& listener, spdlog::logger& logger, - Network::ListenerConfig& listener_config, const quic::QuicConfig& quic_config); + Network::UdpListenerPtr&& listener, Network::ListenerConfig& listener_config, + const quic::QuicConfig& quic_config); uint8_t random_seed_[16]; std::unique_ptr crypto_config_; - spdlog::logger& logger_; Event::Dispatcher& dispatcher_; quic::QuicVersionManager version_manager_; std::unique_ptr quic_dispatcher_; @@ -93,8 +92,8 @@ class ActiveQuicListenerFactory : public Network::ActiveUdpListenerFactory { Network::ConnectionHandler::ActiveListenerPtr createActiveUdpListener(Network::ConnectionHandler& parent, Event::Dispatcher& disptacher, - spdlog::logger& logger, Network::ListenerConfig& config) const override { - return std::make_unique(disptacher, parent, logger, config, quic_config_); + Network::ListenerConfig& config) const override { + return std::make_unique(disptacher, parent, config, quic_config_); } private: diff --git a/source/server/active_raw_udp_listener_config.cc b/source/server/active_raw_udp_listener_config.cc index 0d6cbb196cf7..8eea3f4742e9 100644 --- a/source/server/active_raw_udp_listener_config.cc +++ b/source/server/active_raw_udp_listener_config.cc @@ -6,10 +6,11 @@ namespace Envoy { namespace Server { -Network::ConnectionHandler::ActiveListenerPtr ActiveRawUdpListenerFactory::createActiveUdpListener( - Network::ConnectionHandler& /*parent*/, Event::Dispatcher& dispatcher, - spdlog::logger& /*logger*/, Network::ListenerConfig& config) const { - return std::make_unique(dispatcher, config); +Network::ConnectionHandler::ActiveListenerPtr +ActiveRawUdpListenerFactory::createActiveUdpListener(Network::ConnectionHandler& parent, + Event::Dispatcher& dispatcher, + Network::ListenerConfig& config) const { + return std::make_unique(parent, dispatcher, config); } ProtobufTypes::MessagePtr ActiveRawUdpListenerConfigFactory::createEmptyConfigProto() { diff --git a/source/server/active_raw_udp_listener_config.h b/source/server/active_raw_udp_listener_config.h index b58554c9c3fb..de37a5cbb2c8 100644 --- a/source/server/active_raw_udp_listener_config.h +++ b/source/server/active_raw_udp_listener_config.h @@ -11,7 +11,7 @@ class ActiveRawUdpListenerFactory : public Network::ActiveUdpListenerFactory { public: Network::ConnectionHandler::ActiveListenerPtr createActiveUdpListener(Network::ConnectionHandler& parent, Event::Dispatcher& disptacher, - spdlog::logger& logger, Network::ListenerConfig& config) const override; + Network::ListenerConfig& config) const override; }; // This class uses a protobuf config to create a UDP listener factory which diff --git a/source/server/config_validation/server.h b/source/server/config_validation/server.h index 8a59c4a046ff..d1bf74408a4a 100644 --- a/source/server/config_validation/server.h +++ b/source/server/config_validation/server.h @@ -145,7 +145,7 @@ class ValidationInstance : Logger::Loggable, uint64_t nextListenerTag() override { return 0; } // Server::WorkerFactory - WorkerPtr createWorker(OverloadManager&) override { + WorkerPtr createWorker(OverloadManager&, const std::string&) override { // Returned workers are not currently used so we can return nothing here safely vs. a // validation mock. return nullptr; diff --git a/source/server/connection_handler_impl.cc b/source/server/connection_handler_impl.cc index 398cb88fd71c..3ce3a0e74329 100644 --- a/source/server/connection_handler_impl.cc +++ b/source/server/connection_handler_impl.cc @@ -14,8 +14,10 @@ namespace Envoy { namespace Server { -ConnectionHandlerImpl::ConnectionHandlerImpl(spdlog::logger& logger, Event::Dispatcher& dispatcher) - : logger_(logger), dispatcher_(dispatcher), disable_listeners_(false) {} +ConnectionHandlerImpl::ConnectionHandlerImpl(Event::Dispatcher& dispatcher, + const std::string& per_handler_stat_prefix) + : dispatcher_(dispatcher), per_handler_stat_prefix_(per_handler_stat_prefix + "."), + disable_listeners_(false) {} void ConnectionHandlerImpl::incNumConnections() { ++num_connections_; } @@ -33,8 +35,7 @@ void ConnectionHandlerImpl::addListener(Network::ListenerConfig& config) { } else { ASSERT(socket_type == Network::Address::SocketType::Datagram, "Only datagram/stream listener supported"); - listener = - config.udpListenerFactory()->createActiveUdpListener(*this, dispatcher_, logger_, config); + listener = config.udpListenerFactory()->createActiveUdpListener(*this, dispatcher_, config); } if (disable_listeners_) { @@ -82,8 +83,7 @@ void ConnectionHandlerImpl::enableListeners() { } void ConnectionHandlerImpl::ActiveTcpListener::removeConnection(ActiveConnection& connection) { - ENVOY_CONN_LOG_TO_LOGGER(parent_.logger_, debug, "adding to cleanup list", - *connection.connection_); + ENVOY_CONN_LOG(debug, "adding to cleanup list", *connection.connection_); ActiveConnectionPtr removed = connection.removeFromList(connections_); parent_.dispatcher_.deferredDelete(std::move(removed)); ASSERT(parent_.num_connections_ > 0); @@ -91,8 +91,15 @@ void ConnectionHandlerImpl::ActiveTcpListener::removeConnection(ActiveConnection } ConnectionHandlerImpl::ActiveListenerImplBase::ActiveListenerImplBase( - Network::ListenerPtr&& listener, Network::ListenerConfig& config) - : listener_(std::move(listener)), stats_(generateStats(config.listenerScope())), + Network::ConnectionHandler& parent, Network::ListenerPtr&& listener, + Network::ListenerConfig& config) + : listener_(std::move(listener)), + stats_({ALL_LISTENER_STATS(POOL_COUNTER(config.listenerScope()), + POOL_GAUGE(config.listenerScope()), + POOL_HISTOGRAM(config.listenerScope()))}), + per_worker_stats_({ALL_PER_HANDLER_LISTENER_STATS( + POOL_COUNTER_PREFIX(config.listenerScope(), parent.statPrefix()), + POOL_GAUGE_PREFIX(config.listenerScope(), parent.statPrefix()))}), listener_filters_timeout_(config.listenerFiltersTimeout()), continue_on_listener_filters_timeout_(config.continueOnListenerFiltersTimeout()), listener_tag_(config.listenerTag()), config_(config) {} @@ -108,7 +115,8 @@ ConnectionHandlerImpl::ActiveTcpListener::ActiveTcpListener(ConnectionHandlerImp ConnectionHandlerImpl::ActiveTcpListener::ActiveTcpListener(ConnectionHandlerImpl& parent, Network::ListenerPtr&& listener, Network::ListenerConfig& config) - : ConnectionHandlerImpl::ActiveListenerImplBase(std::move(listener), config), parent_(parent) {} + : ConnectionHandlerImpl::ActiveListenerImplBase(parent, std::move(listener), config), + parent_(parent) {} ConnectionHandlerImpl::ActiveTcpListener::~ActiveTcpListener() { // Purge sockets that have not progressed to connections. This should only happen when @@ -165,11 +173,11 @@ ConnectionHandlerImpl::findActiveListenerByAddress(const Network::Address::Insta void ConnectionHandlerImpl::ActiveSocket::onTimeout() { listener_.stats_.downstream_pre_cx_timeout_.inc(); ASSERT(inserted()); - ENVOY_LOG_TO_LOGGER(listener_.parent_.logger_, debug, "listener filter times out after {} ms", - listener_.listener_filters_timeout_.count()); + ENVOY_LOG(debug, "listener filter times out after {} ms", + listener_.listener_filters_timeout_.count()); if (listener_.continue_on_listener_filters_timeout_) { - ENVOY_LOG_TO_LOGGER(listener_.parent_.logger_, debug, "fallback to default listener filter"); + ENVOY_LOG(debug, "fallback to default listener filter"); newConnection(); } unlink(); @@ -279,8 +287,7 @@ void ConnectionHandlerImpl::ActiveTcpListener::newConnection( // Find matching filter chain. const auto filter_chain = config_.filterChainManager().findFilterChain(*socket); if (filter_chain == nullptr) { - ENVOY_LOG_TO_LOGGER(parent_.logger_, debug, - "closing connection: no matching filter chain found"); + ENVOY_LOG(debug, "closing connection: no matching filter chain found"); stats_.no_filter_chain_match_.inc(); socket->close(); return; @@ -294,8 +301,7 @@ void ConnectionHandlerImpl::ActiveTcpListener::newConnection( const bool empty_filter_chain = !config_.filterChainFactory().createNetworkFilterChain( *new_connection, filter_chain->networkFilterFactories()); if (empty_filter_chain) { - ENVOY_CONN_LOG_TO_LOGGER(parent_.logger_, debug, "closing connection: no filters", - *new_connection); + ENVOY_CONN_LOG(debug, "closing connection: no filters", *new_connection); new_connection->close(Network::ConnectionCloseType::NoFlush); return; } @@ -305,7 +311,7 @@ void ConnectionHandlerImpl::ActiveTcpListener::newConnection( void ConnectionHandlerImpl::ActiveTcpListener::onNewConnection( Network::ConnectionPtr&& new_connection) { - ENVOY_CONN_LOG_TO_LOGGER(parent_.logger_, debug, "new connection", *new_connection); + ENVOY_CONN_LOG(debug, "new connection", *new_connection); // If the connection is already closed, we can just let this connection immediately die. if (new_connection->state() != Network::Connection::State::Closed) { @@ -327,27 +333,28 @@ ConnectionHandlerImpl::ActiveConnection::ActiveConnection(ActiveTcpListener& lis connection_->addConnectionCallbacks(*this); listener_.stats_.downstream_cx_total_.inc(); listener_.stats_.downstream_cx_active_.inc(); + listener_.per_worker_stats_.downstream_cx_total_.inc(); + listener_.per_worker_stats_.downstream_cx_active_.inc(); } ConnectionHandlerImpl::ActiveConnection::~ActiveConnection() { listener_.stats_.downstream_cx_active_.dec(); listener_.stats_.downstream_cx_destroy_.inc(); + listener_.per_worker_stats_.downstream_cx_active_.dec(); conn_length_->complete(); } -ListenerStats ConnectionHandlerImpl::generateStats(Stats::Scope& scope) { - return {ALL_LISTENER_STATS(POOL_COUNTER(scope), POOL_GAUGE(scope), POOL_HISTOGRAM(scope))}; -} - -ActiveUdpListener::ActiveUdpListener(Event::Dispatcher& dispatcher, Network::ListenerConfig& config) - : ActiveUdpListener(dispatcher.createUdpListener(config.socket(), *this), config) {} +ActiveUdpListener::ActiveUdpListener(Network::ConnectionHandler& parent, + Event::Dispatcher& dispatcher, Network::ListenerConfig& config) + : ActiveUdpListener(parent, dispatcher.createUdpListener(config.socket(), *this), config) {} -ActiveUdpListener::ActiveUdpListener(Network::ListenerPtr&& listener, +ActiveUdpListener::ActiveUdpListener(Network::ConnectionHandler& parent, + Network::ListenerPtr&& listener, Network::ListenerConfig& config) - : ConnectionHandlerImpl::ActiveListenerImplBase(std::move(listener), config), - udp_listener_(dynamic_cast(listener_.get())), read_filter_(nullptr) { + : ConnectionHandlerImpl::ActiveListenerImplBase(parent, std::move(listener), config), + udp_listener_(*dynamic_cast(listener_.get())), read_filter_(nullptr) { // TODO(sumukhs): Try to avoid dynamic_cast by coming up with a better interface design - ASSERT(udp_listener_ != nullptr, ""); + ASSERT(dynamic_cast(listener_.get()) != nullptr, ""); // Create the filter chain on creating a new udp listener config_.filterChainFactory().createUdpListenerFilterChain(*this, *this); @@ -380,7 +387,7 @@ void ActiveUdpListener::addReadFilter(Network::UdpListenerReadFilterPtr&& filter read_filter_ = std::move(filter); } -Network::UdpListener& ActiveUdpListener::udpListener() { return *udp_listener_; } +Network::UdpListener& ActiveUdpListener::udpListener() { return udp_listener_; } } // namespace Server } // namespace Envoy diff --git a/source/server/connection_handler_impl.h b/source/server/connection_handler_impl.h index ecf1946bf234..187d0252c2b0 100644 --- a/source/server/connection_handler_impl.h +++ b/source/server/connection_handler_impl.h @@ -23,12 +23,6 @@ #include "spdlog/spdlog.h" namespace Envoy { - -namespace Quic { -class ActiveQuicListener; -class EnvoyQuicDispatcher; -} // namespace Quic - namespace Server { #define ALL_LISTENER_STATS(COUNTER, GAUGE, HISTOGRAM) \ @@ -47,13 +41,26 @@ struct ListenerStats { ALL_LISTENER_STATS(GENERATE_COUNTER_STRUCT, GENERATE_GAUGE_STRUCT, GENERATE_HISTOGRAM_STRUCT) }; +#define ALL_PER_HANDLER_LISTENER_STATS(COUNTER, GAUGE) \ + COUNTER(downstream_cx_total) \ + GAUGE(downstream_cx_active, Accumulate) + +/** + * Wrapper struct for per-handler listener stats. @see stats_macros.h + */ +struct PerHandlerListenerStats { + ALL_PER_HANDLER_LISTENER_STATS(GENERATE_COUNTER_STRUCT, GENERATE_GAUGE_STRUCT) +}; + /** * Server side connection handler. This is used both by workers as well as the * main thread for non-threaded listeners. */ -class ConnectionHandlerImpl : public Network::ConnectionHandler, NonCopyable { +class ConnectionHandlerImpl : public Network::ConnectionHandler, + NonCopyable, + Logger::Loggable { public: - ConnectionHandlerImpl(spdlog::logger& logger, Event::Dispatcher& dispatcher); + ConnectionHandlerImpl(Event::Dispatcher& dispatcher, const std::string& per_handler_stat_prefix); // Network::ConnectionHandler uint64_t numConnections() override { return num_connections_; } @@ -65,6 +72,7 @@ class ConnectionHandlerImpl : public Network::ConnectionHandler, NonCopyable { void stopListeners() override; void disableListeners() override; void enableListeners() override; + const std::string& statPrefix() override { return per_handler_stat_prefix_; } Network::Listener* findListenerByAddress(const Network::Address::Instance& address) override; @@ -76,7 +84,8 @@ class ConnectionHandlerImpl : public Network::ConnectionHandler, NonCopyable { */ class ActiveListenerImplBase : public Network::ConnectionHandler::ActiveListener { public: - ActiveListenerImplBase(Network::ListenerPtr&& listener, Network::ListenerConfig& config); + ActiveListenerImplBase(Network::ConnectionHandler& parent, Network::ListenerPtr&& listener, + Network::ListenerConfig& config); // Network::ConnectionHandler::ActiveListener. uint64_t listenerTag() override { return listener_tag_; } @@ -85,6 +94,7 @@ class ConnectionHandlerImpl : public Network::ConnectionHandler, NonCopyable { Network::ListenerPtr listener_; ListenerStats stats_; + PerHandlerListenerStats per_worker_stats_; const std::chrono::milliseconds listener_filters_timeout_; const bool continue_on_listener_filters_timeout_; const uint64_t listener_tag_; @@ -92,28 +102,19 @@ class ConnectionHandlerImpl : public Network::ConnectionHandler, NonCopyable { }; private: - class ActiveUdpListener; - using ActiveUdpListenerPtr = std::unique_ptr; - class ActiveTcpListener; - using ActiveTcpListenerPtr = std::unique_ptr; struct ActiveConnection; using ActiveConnectionPtr = std::unique_ptr; struct ActiveSocket; using ActiveSocketPtr = std::unique_ptr; - friend class Quic::ActiveQuicListener; - friend class Quic::EnvoyQuicDispatcher; - /** * Wrapper for an active tcp listener owned by this handler. */ class ActiveTcpListener : public Network::ListenerCallbacks, public ActiveListenerImplBase { public: ActiveTcpListener(ConnectionHandlerImpl& parent, Network::ListenerConfig& config); - ActiveTcpListener(ConnectionHandlerImpl& parent, Network::ListenerPtr&& listener, Network::ListenerConfig& config); - ~ActiveTcpListener() override; // Network::ListenerCallbacks @@ -205,10 +206,8 @@ class ConnectionHandlerImpl : public Network::ConnectionHandler, NonCopyable { Event::TimerPtr timer_; }; - static ListenerStats generateStats(Stats::Scope& scope); - - spdlog::logger& logger_; Event::Dispatcher& dispatcher_; + const std::string per_handler_stat_prefix_; std::list> listeners_; @@ -225,9 +224,10 @@ class ActiveUdpListener : public Network::UdpListenerCallbacks, public Network::UdpListenerFilterManager, public Network::UdpReadFilterCallbacks { public: - ActiveUdpListener(Event::Dispatcher& dispatcher, Network::ListenerConfig& config); - - ActiveUdpListener(Network::ListenerPtr&& listener, Network::ListenerConfig& config); + ActiveUdpListener(Network::ConnectionHandler& parent, Event::Dispatcher& dispatcher, + Network::ListenerConfig& config); + ActiveUdpListener(Network::ConnectionHandler& parent, Network::ListenerPtr&& listener, + Network::ListenerConfig& config); // Network::UdpListenerCallbacks void onData(Network::UdpRecvData& data) override; @@ -242,7 +242,7 @@ class ActiveUdpListener : public Network::UdpListenerCallbacks, Network::UdpListener& udpListener() override; private: - Network::UdpListener* udp_listener_; + Network::UdpListener& udp_listener_; Network::UdpListenerReadFilterPtr read_filter_; }; diff --git a/source/server/guarddog_impl.cc b/source/server/guarddog_impl.cc index 5b0b80163478..a856511f5654 100644 --- a/source/server/guarddog_impl.cc +++ b/source/server/guarddog_impl.cc @@ -19,9 +19,10 @@ namespace Server { GuardDogImpl::GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuration::Main& config, Api::Api& api, std::unique_ptr&& test_interlock) - : test_interlock_hook_(std::move(test_interlock)), time_source_(api.timeSource()), - miss_timeout_(config.wdMissTimeout()), megamiss_timeout_(config.wdMegaMissTimeout()), - kill_timeout_(config.wdKillTimeout()), multi_kill_timeout_(config.wdMultiKillTimeout()), + : test_interlock_hook_(std::move(test_interlock)), stats_scope_(stats_scope), + time_source_(api.timeSource()), miss_timeout_(config.wdMissTimeout()), + megamiss_timeout_(config.wdMegaMissTimeout()), kill_timeout_(config.wdKillTimeout()), + multi_kill_timeout_(config.wdMultiKillTimeout()), loop_interval_([&]() -> std::chrono::milliseconds { // The loop interval is simply the minimum of all specified intervals, // but we must account for the 0=disabled case. This lambda takes care @@ -62,36 +63,38 @@ void GuardDogImpl::step() { bool seen_one_multi_timeout(false); Thread::LockGuard guard(wd_lock_); for (auto& watched_dog : watched_dogs_) { - const auto ltt = watched_dog.dog_->lastTouchTime(); + const auto ltt = watched_dog->dog_->lastTouchTime(); const auto delta = now - ltt; - if (watched_dog.last_alert_time_ && watched_dog.last_alert_time_.value() < ltt) { - watched_dog.miss_alerted_ = false; - watched_dog.megamiss_alerted_ = false; + if (watched_dog->last_alert_time_ && watched_dog->last_alert_time_.value() < ltt) { + watched_dog->miss_alerted_ = false; + watched_dog->megamiss_alerted_ = false; } if (delta > miss_timeout_) { - if (!watched_dog.miss_alerted_) { + if (!watched_dog->miss_alerted_) { watchdog_miss_counter_.inc(); - watched_dog.last_alert_time_ = ltt; - watched_dog.miss_alerted_ = true; + watched_dog->miss_counter_.inc(); + watched_dog->last_alert_time_ = ltt; + watched_dog->miss_alerted_ = true; } } if (delta > megamiss_timeout_) { - if (!watched_dog.megamiss_alerted_) { + if (!watched_dog->megamiss_alerted_) { watchdog_megamiss_counter_.inc(); - watched_dog.last_alert_time_ = ltt; - watched_dog.megamiss_alerted_ = true; + watched_dog->megamiss_counter_.inc(); + watched_dog->last_alert_time_ = ltt; + watched_dog->megamiss_alerted_ = true; } } if (killEnabled() && delta > kill_timeout_) { PANIC(fmt::format("GuardDog: one thread ({}) stuck for more than watchdog_kill_timeout", - watched_dog.dog_->threadId().debugString())); + watched_dog->dog_->threadId().debugString())); } if (multikillEnabled() && delta > multi_kill_timeout_) { if (seen_one_multi_timeout) { PANIC(fmt::format( "GuardDog: multiple threads ({},...) stuck for more than watchdog_multikill_timeout", - watched_dog.dog_->threadId().debugString())); + watched_dog->dog_->threadId().debugString())); } else { seen_one_multi_timeout = true; } @@ -108,19 +111,19 @@ void GuardDogImpl::step() { } } -WatchDogSharedPtr GuardDogImpl::createWatchDog(Thread::ThreadId thread_id) { +WatchDogSharedPtr GuardDogImpl::createWatchDog(Thread::ThreadId thread_id, + const std::string& thread_name) { // Timer started by WatchDog will try to fire at 1/2 of the interval of the // minimum timeout specified. loop_interval_ is const so all shared state // accessed out of the locked section below is const (time_source_ has no // state). - auto wd_interval = loop_interval_ / 2; + const auto wd_interval = loop_interval_ / 2; WatchDogSharedPtr new_watchdog = std::make_shared(std::move(thread_id), time_source_, wd_interval); - WatchedDog watched_dog; - watched_dog.dog_ = new_watchdog; + WatchedDogPtr watched_dog = std::make_unique(stats_scope_, thread_name, new_watchdog); { Thread::LockGuard guard(wd_lock_); - watched_dogs_.push_back(watched_dog); + watched_dogs_.push_back(std::move(watched_dog)); } new_watchdog->touch(); return new_watchdog; @@ -129,7 +132,7 @@ WatchDogSharedPtr GuardDogImpl::createWatchDog(Thread::ThreadId thread_id) { void GuardDogImpl::stopWatching(WatchDogSharedPtr wd) { Thread::LockGuard guard(wd_lock_); auto found_wd = std::find_if(watched_dogs_.begin(), watched_dogs_.end(), - [&wd](const WatchedDog& d) -> bool { return d.dog_ == wd; }); + [&wd](const WatchedDogPtr& d) -> bool { return d->dog_ == wd; }); if (found_wd != watched_dogs_.end()) { watched_dogs_.erase(found_wd); } else { @@ -156,5 +159,17 @@ void GuardDogImpl::stop() { } } +GuardDogImpl::WatchedDog::WatchedDog(Stats::Scope& stats_scope, const std::string& thread_name, + const WatchDogSharedPtr& watch_dog) + : dog_(watch_dog), + miss_counter_(stats_scope.counterFromStatName( + Stats::StatNameManagedStorage(fmt::format("server.{}.watchdog_miss", thread_name), + stats_scope.symbolTable()) + .statName())), + megamiss_counter_(stats_scope.counterFromStatName( + Stats::StatNameManagedStorage(fmt::format("server.{}.watchdog_mega_miss", thread_name), + stats_scope.symbolTable()) + .statName())) {} + } // namespace Server } // namespace Envoy diff --git a/source/server/guarddog_impl.h b/source/server/guarddog_impl.h index 6d50a2e23fed..3d17829e9a20 100644 --- a/source/server/guarddog_impl.h +++ b/source/server/guarddog_impl.h @@ -87,7 +87,8 @@ class GuardDogImpl : public GuardDog { } // Server::GuardDog - WatchDogSharedPtr createWatchDog(Thread::ThreadId thread_id) override; + WatchDogSharedPtr createWatchDog(Thread::ThreadId thread_id, + const std::string& thread_name) override; void stopWatching(WatchDogSharedPtr wd) override; private: @@ -100,13 +101,20 @@ class GuardDogImpl : public GuardDog { bool multikillEnabled() const { return multi_kill_timeout_ > std::chrono::milliseconds(0); } struct WatchedDog { - WatchDogSharedPtr dog_; + WatchedDog(Stats::Scope& stats_scope, const std::string& thread_name, + const WatchDogSharedPtr& watch_dog); + + const WatchDogSharedPtr dog_; absl::optional last_alert_time_; bool miss_alerted_{}; bool megamiss_alerted_{}; + Stats::Counter& miss_counter_; + Stats::Counter& megamiss_counter_; }; + using WatchedDogPtr = std::unique_ptr; std::unique_ptr test_interlock_hook_; + Stats::Scope& stats_scope_; TimeSource& time_source_; const std::chrono::milliseconds miss_timeout_; const std::chrono::milliseconds megamiss_timeout_; @@ -115,7 +123,7 @@ class GuardDogImpl : public GuardDog { const std::chrono::milliseconds loop_interval_; Stats::Counter& watchdog_miss_counter_; Stats::Counter& watchdog_megamiss_counter_; - std::vector watched_dogs_ ABSL_GUARDED_BY(wd_lock_); + std::vector watched_dogs_ ABSL_GUARDED_BY(wd_lock_); Thread::MutexBasicLockable wd_lock_; Thread::ThreadPtr thread_; Event::DispatcherPtr dispatcher_; diff --git a/source/server/listener_manager_impl.cc b/source/server/listener_manager_impl.cc index 2aa454b16d32..9c78c1ec41a7 100644 --- a/source/server/listener_manager_impl.cc +++ b/source/server/listener_manager_impl.cc @@ -444,7 +444,8 @@ ListenerManagerImpl::ListenerManagerImpl(Instance& server, "listeners", [this] { return dumpListenerConfigs(); })), enable_dispatcher_stats_(enable_dispatcher_stats) { for (uint32_t i = 0; i < server.options().concurrency(); i++) { - workers_.emplace_back(worker_factory.createWorker(server.overloadManager())); + workers_.emplace_back( + worker_factory.createWorker(server.overloadManager(), fmt::format("worker_{}", i))); } } diff --git a/source/server/server.cc b/source/server/server.cc index a92f83dd9620..0f0b8cd3c24e 100644 --- a/source/server/server.cc +++ b/source/server/server.cc @@ -64,7 +64,7 @@ InstanceImpl::InstanceImpl(const Options& options, Event::TimeSystem& time_syste api_(new Api::Impl(thread_factory, store, time_system, file_system)), dispatcher_(api_->allocateDispatcher()), singleton_manager_(new Singleton::ManagerImpl(api_->threadFactory())), - handler_(new ConnectionHandlerImpl(ENVOY_LOGGER(), *dispatcher_)), + handler_(new ConnectionHandlerImpl(*dispatcher_, "main_thread")), random_generator_(std::move(random_generator)), listener_component_factory_(*this), worker_factory_(thread_local_, *api_, hooks), dns_resolver_(dispatcher_->createDnsResolver({})), @@ -530,7 +530,8 @@ void InstanceImpl::run() { // Run the main dispatch loop waiting to exit. ENVOY_LOG(info, "starting main dispatch loop"); - auto watchdog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); + auto watchdog = + guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "main_thread"); watchdog->startWatchdog(*dispatcher_); dispatcher_->post([this] { notifyCallbacksForStage(Stage::Startup); }); dispatcher_->run(Event::Dispatcher::RunType::Block); diff --git a/source/server/worker_impl.cc b/source/server/worker_impl.cc index bf38d2181d85..4c829abf1e9b 100644 --- a/source/server/worker_impl.cc +++ b/source/server/worker_impl.cc @@ -13,19 +13,21 @@ namespace Envoy { namespace Server { -WorkerPtr ProdWorkerFactory::createWorker(OverloadManager& overload_manager) { +WorkerPtr ProdWorkerFactory::createWorker(OverloadManager& overload_manager, + const std::string& worker_name) { Event::DispatcherPtr dispatcher(api_.allocateDispatcher()); return WorkerPtr{new WorkerImpl( tls_, hooks_, std::move(dispatcher), - Network::ConnectionHandlerPtr{new ConnectionHandlerImpl(ENVOY_LOGGER(), *dispatcher)}, - overload_manager, api_)}; + Network::ConnectionHandlerPtr{new ConnectionHandlerImpl(*dispatcher, worker_name)}, + overload_manager, api_, worker_name)}; } WorkerImpl::WorkerImpl(ThreadLocal::Instance& tls, ListenerHooks& hooks, Event::DispatcherPtr&& dispatcher, Network::ConnectionHandlerPtr handler, - OverloadManager& overload_manager, Api::Api& api) + OverloadManager& overload_manager, Api::Api& api, + const std::string& worker_name) : tls_(tls), hooks_(hooks), dispatcher_(std::move(dispatcher)), handler_(std::move(handler)), - api_(api) { + api_(api), worker_name_(worker_name) { tls_.registerThread(*dispatcher_, false); overload_manager.registerForAction( OverloadActionNames::get().StopAcceptingConnections, *dispatcher_, @@ -99,18 +101,22 @@ void WorkerImpl::stopListeners() { void WorkerImpl::threadRoutine(GuardDog& guard_dog) { ENVOY_LOG(debug, "worker entering dispatch loop"); - auto watchdog = guard_dog.createWatchDog(api_.threadFactory().currentThreadId()); - watchdog->startWatchdog(*dispatcher_); + // The watch dog must be created after the dispatcher starts running and has post events flushed, + // as this is when TLS stat scopes start working. + dispatcher_->post([this, &guard_dog]() { + watch_dog_ = guard_dog.createWatchDog(api_.threadFactory().currentThreadId(), worker_name_); + watch_dog_->startWatchdog(*dispatcher_); + }); dispatcher_->run(Event::Dispatcher::RunType::Block); ENVOY_LOG(debug, "worker exited dispatch loop"); - guard_dog.stopWatching(watchdog); + guard_dog.stopWatching(watch_dog_); // We must close all active connections before we actually exit the thread. This prevents any // destructors from running on the main thread which might reference thread locals. Destroying // the handler does this which additionally purges the dispatcher delayed deletion list. handler_.reset(); tls_.shutdownThread(); - watchdog.reset(); + watch_dog_.reset(); } void WorkerImpl::stopAcceptingConnectionsCb(OverloadActionState state) { diff --git a/source/server/worker_impl.h b/source/server/worker_impl.h index 3e56578303ca..0f27d0f6f824 100644 --- a/source/server/worker_impl.h +++ b/source/server/worker_impl.h @@ -23,7 +23,8 @@ class ProdWorkerFactory : public WorkerFactory, Logger::Loggable { public: WorkerImpl(ThreadLocal::Instance& tls, ListenerHooks& hooks, Event::DispatcherPtr&& dispatcher, Network::ConnectionHandlerPtr handler, OverloadManager& overload_manager, - Api::Api& api); + Api::Api& api, const std::string& worker_name); // Server::Worker void addListener(Network::ListenerConfig& listener, AddListenerCompletion completion) override; @@ -60,6 +61,8 @@ class WorkerImpl : public Worker, Logger::Loggable { Network::ConnectionHandlerPtr handler_; Api::Api& api_; Thread::ThreadPtr thread_; + const std::string worker_name_; + WatchDogSharedPtr watch_dog_; }; } // namespace Server diff --git a/test/common/event/dispatched_thread_impl_test.cc b/test/common/event/dispatched_thread_impl_test.cc deleted file mode 100644 index 704fbc354512..000000000000 --- a/test/common/event/dispatched_thread_impl_test.cc +++ /dev/null @@ -1,51 +0,0 @@ -#include - -#include "common/api/api_impl.h" -#include "common/common/utility.h" -#include "common/event/dispatched_thread.h" - -#include "server/guarddog_impl.h" - -#include "test/mocks/common.h" -#include "test/mocks/server/mocks.h" -#include "test/mocks/stats/mocks.h" -#include "test/test_common/utility.h" - -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -using testing::InSequence; -using testing::NiceMock; - -namespace Envoy { -namespace Event { -namespace { - -class DispatchedThreadTest : public testing::Test { -protected: - DispatchedThreadTest() - : config_(1000, 1000, 1000, 1000), api_(Api::createApiForTest(fakestats_)), thread_(*api_), - guard_dog_(fakestats_, config_, *api_) {} - - void SetUp() override { thread_.start(guard_dog_); } - - NiceMock config_; - Stats::IsolatedStoreImpl fakestats_; - Api::ApiPtr api_; - DispatchedThreadImpl thread_; - Envoy::Server::GuardDogImpl guard_dog_; -}; - -TEST_F(DispatchedThreadTest, PostCallbackTest) { - InSequence s; - ReadyWatcher watcher; - - EXPECT_CALL(watcher, ready()); - thread_.dispatcher().post([&watcher]() { watcher.ready(); }); - - thread_.exit(); -} - -} // namespace -} // namespace Event -} // namespace Envoy diff --git a/test/extensions/quic_listeners/quiche/active_quic_listener_test.cc b/test/extensions/quic_listeners/quiche/active_quic_listener_test.cc index 6c27bade7766..ebfba7f3b464 100644 --- a/test/extensions/quic_listeners/quiche/active_quic_listener_test.cc +++ b/test/extensions/quic_listeners/quiche/active_quic_listener_test.cc @@ -53,7 +53,7 @@ class ActiveQuicListenerTest : public testing::TestWithParamcallbacks_->connection().addConnectionCallbacks( network_connection_callbacks_); }}), - connection_handler_(ENVOY_LOGGER(), *dispatcher_) { + connection_handler_(*dispatcher_, "test_thread") { EXPECT_CALL(listener_config_, listenerFiltersTimeout()); EXPECT_CALL(listener_config_, continueOnListenerFiltersTimeout()); EXPECT_CALL(listener_config_, listenerTag()); @@ -80,8 +80,8 @@ class ActiveQuicListenerTest : public testing::TestWithParam( - *dispatcher_, connection_handler_, ENVOY_LOGGER(), listener_config_, quic_config_); + quic_listener_ = std::make_unique(*dispatcher_, connection_handler_, + listener_config_, quic_config_); simulated_time_system_.sleep(std::chrono::milliseconds(100)); } diff --git a/test/extensions/quic_listeners/quiche/envoy_quic_dispatcher_test.cc b/test/extensions/quic_listeners/quiche/envoy_quic_dispatcher_test.cc index 60cd220f6978..b42ab88d606f 100644 --- a/test/extensions/quic_listeners/quiche/envoy_quic_dispatcher_test.cc +++ b/test/extensions/quic_listeners/quiche/envoy_quic_dispatcher_test.cc @@ -64,7 +64,7 @@ class EnvoyQuicDispatcherTest : public testing::TestWithParam(*dispatcher_), diff --git a/test/integration/fake_upstream.cc b/test/integration/fake_upstream.cc index f55806361987..9b8efad2b7d7 100644 --- a/test/integration/fake_upstream.cc +++ b/test/integration/fake_upstream.cc @@ -397,7 +397,7 @@ FakeUpstream::FakeUpstream(Network::TransportSocketFactoryPtr&& transport_socket : http_type_(type), socket_(std::move(listen_socket)), api_(Api::createApiForTest(stats_store_)), time_system_(time_system), dispatcher_(api_->allocateDispatcher()), - handler_(new Server::ConnectionHandlerImpl(ENVOY_LOGGER(), *dispatcher_)), + handler_(new Server::ConnectionHandlerImpl(*dispatcher_, "fake_upstream")), allow_unexpected_disconnects_(false), read_disable_on_new_connection_(true), enable_half_close_(enable_half_close), listener_(*this), filter_chain_(Network::Test::createEmptyFilterChain(std::move(transport_socket_factory))) { diff --git a/test/integration/integration_test.cc b/test/integration/integration_test.cc index 74a487a25e82..66496cf06e06 100644 --- a/test/integration/integration_test.cc +++ b/test/integration/integration_test.cc @@ -52,6 +52,25 @@ INSTANTIATE_TEST_SUITE_P(IpVersions, IntegrationTest, testing::ValuesIn(TestEnvironment::getIpVersionsForTest()), TestUtility::ipTestParamsToString); +// Make sure we have correctly specified per-worker performance stats. +TEST_P(IntegrationTest, PerWorkerStats) { + initialize(); + + // Per-worker listener stats. + if (GetParam() == Network::Address::IpVersion::v4) { + EXPECT_NE(nullptr, test_server_->counter("listener.127.0.0.1_0.worker_0.downstream_cx_total")); + } else { + EXPECT_NE(nullptr, test_server_->counter("listener.[__1]_0.worker_0.downstream_cx_total")); + } + + // Main thread admin listener stats. + EXPECT_NE(nullptr, test_server_->counter("listener.admin.main_thread.downstream_cx_total")); + + // Per-thread watchdog stats. + EXPECT_NE(nullptr, test_server_->counter("server.main_thread.watchdog_miss")); + EXPECT_NE(nullptr, test_server_->counter("server.worker_0.watchdog_miss")); +} + TEST_P(IntegrationTest, RouterDirectResponse) { const std::string body = "Response body"; const std::string file_path = TestEnvironment::writeStringToFileForTest("test_envoy", body); diff --git a/test/mocks/network/mocks.h b/test/mocks/network/mocks.h index 77d2c37d74c6..0fc8439779aa 100644 --- a/test/mocks/network/mocks.h +++ b/test/mocks/network/mocks.h @@ -345,6 +345,7 @@ class MockConnectionHandler : public ConnectionHandler { MOCK_METHOD0(stopListeners, void()); MOCK_METHOD0(disableListeners, void()); MOCK_METHOD0(enableListeners, void()); + MOCK_METHOD0(statPrefix, const std::string&()); }; class MockIp : public Address::Ip { diff --git a/test/mocks/server/mocks.cc b/test/mocks/server/mocks.cc index 92ba5f5456dc..87263612946e 100644 --- a/test/mocks/server/mocks.cc +++ b/test/mocks/server/mocks.cc @@ -72,7 +72,7 @@ MockWatchDog::MockWatchDog() = default; MockWatchDog::~MockWatchDog() = default; MockGuardDog::MockGuardDog() : watch_dog_(new NiceMock()) { - ON_CALL(*this, createWatchDog(_)).WillByDefault(Return(watch_dog_)); + ON_CALL(*this, createWatchDog(_, _)).WillByDefault(Return(watch_dog_)); } MockGuardDog::~MockGuardDog() = default; diff --git a/test/mocks/server/mocks.h b/test/mocks/server/mocks.h index b8398a630b50..b12cffe483a6 100644 --- a/test/mocks/server/mocks.h +++ b/test/mocks/server/mocks.h @@ -193,7 +193,8 @@ class MockGuardDog : public GuardDog { ~MockGuardDog() override; // Server::GuardDog - MOCK_METHOD1(createWatchDog, WatchDogSharedPtr(Thread::ThreadId)); + MOCK_METHOD2(createWatchDog, + WatchDogSharedPtr(Thread::ThreadId thread_id, const std::string& thread_name)); MOCK_METHOD1(stopWatching, void(WatchDogSharedPtr wd)); std::shared_ptr watch_dog_; @@ -293,7 +294,9 @@ class MockWorkerFactory : public WorkerFactory { ~MockWorkerFactory() override; // Server::WorkerFactory - WorkerPtr createWorker(OverloadManager&) override { return WorkerPtr{createWorker_()}; } + WorkerPtr createWorker(OverloadManager&, const std::string&) override { + return WorkerPtr{createWorker_()}; + } MOCK_METHOD0(createWorker_, Worker*()); }; diff --git a/test/server/connection_handler_test.cc b/test/server/connection_handler_test.cc index a6dadb2b9e54..40d9f89489bb 100644 --- a/test/server/connection_handler_test.cc +++ b/test/server/connection_handler_test.cc @@ -32,7 +32,7 @@ namespace { class ConnectionHandlerTest : public testing::Test, protected Logger::Loggable { public: ConnectionHandlerTest() - : handler_(new ConnectionHandlerImpl(ENVOY_LOGGER(), dispatcher_)), + : handler_(new ConnectionHandlerImpl(dispatcher_, "test")), filter_chain_(Network::Test::createEmptyFilterChainWithRawBufferSockets()) {} class TestListener : public Network::ListenerConfig, public LinkedObject { @@ -379,7 +379,18 @@ TEST_F(ConnectionHandlerTest, NormalRedirect) { EXPECT_CALL(dispatcher_, createServerConnection_(_, _)).WillOnce(Return(connection)); EXPECT_CALL(factory_, createNetworkFilterChain(_, _)).WillOnce(Return(true)); listener_callbacks1->onAccept(Network::ConnectionSocketPtr{accepted_socket}, true); + + // Verify per-listener connection stats. EXPECT_EQ(1UL, handler_->numConnections()); + EXPECT_EQ(1UL, TestUtility::findCounter(stats_store_, "downstream_cx_total")->value()); + EXPECT_EQ(1UL, TestUtility::findGauge(stats_store_, "downstream_cx_active")->value()); + EXPECT_EQ(1UL, TestUtility::findCounter(stats_store_, "test.downstream_cx_total")->value()); + EXPECT_EQ(1UL, TestUtility::findGauge(stats_store_, "test.downstream_cx_active")->value()); + + connection->close(Network::ConnectionCloseType::NoFlush); + dispatcher_.clearDeferredDeleteList(); + EXPECT_EQ(0UL, TestUtility::findGauge(stats_store_, "downstream_cx_active")->value()); + EXPECT_EQ(0UL, TestUtility::findGauge(stats_store_, "test.downstream_cx_active")->value()); EXPECT_CALL(*listener2, onDestroy()); EXPECT_CALL(*listener1, onDestroy()); diff --git a/test/server/guarddog_impl_test.cc b/test/server/guarddog_impl_test.cc index 66f5b117121f..5d04fe52bc5e 100644 --- a/test/server/guarddog_impl_test.cc +++ b/test/server/guarddog_impl_test.cc @@ -98,7 +98,7 @@ class GuardDogDeathTest : public GuardDogTestBase { void SetupForDeath() { InSequence s; initGuardDog(fakestats_, config_kill_); - unpet_dog_ = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); + unpet_dog_ = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); guard_dog_->forceCheckForTest(); time_system_->sleep(std::chrono::milliseconds(99)); // 1 ms shy of death. } @@ -110,9 +110,11 @@ class GuardDogDeathTest : public GuardDogTestBase { void SetupForMultiDeath() { InSequence s; initGuardDog(fakestats_, config_multikill_); - auto unpet_dog_ = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); + auto unpet_dog_ = + guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); guard_dog_->forceCheckForTest(); - auto second_dog_ = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); + auto second_dog_ = + guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); guard_dog_->forceCheckForTest(); time_system_->sleep(std::chrono::milliseconds(499)); // 1 ms shy of multi-death. } @@ -177,8 +179,9 @@ TEST_P(GuardDogAlmostDeadTest, NearDeathTest) { // there is no death. The positive case is covered in MultiKillDeathTest. InSequence s; initGuardDog(fakestats_, config_multikill_); - auto unpet_dog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); - auto pet_dog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); + auto unpet_dog = + guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); + auto pet_dog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); // This part "waits" 600 milliseconds while one dog is touched every 100, and // the other is not. 600ms is over the threshold of 500ms for multi-kill but // only one is nonresponsive, so there should be no kill (single kill @@ -194,6 +197,19 @@ class GuardDogMissTest : public GuardDogTestBase { protected: GuardDogMissTest() : config_miss_(500, 1000, 0, 0), config_mega_(1000, 500, 0, 0) {} + void checkMiss(uint64_t count) { + EXPECT_EQ(count, TestUtility::findCounter(stats_store_, "server.watchdog_miss")->value()); + EXPECT_EQ(count, + TestUtility::findCounter(stats_store_, "server.test_thread.watchdog_miss")->value()); + } + + void checkMegaMiss(uint64_t count) { + EXPECT_EQ(count, TestUtility::findCounter(stats_store_, "server.watchdog_mega_miss")->value()); + EXPECT_EQ( + count, + TestUtility::findCounter(stats_store_, "server.test_thread.watchdog_mega_miss")->value()); + } + NiceMock config_miss_; NiceMock config_mega_; }; @@ -205,17 +221,18 @@ TEST_P(GuardDogMissTest, MissTest) { // This test checks the actual collected statistics after doing some timer // advances that should and shouldn't increment the counters. initGuardDog(stats_store_, config_miss_); + auto unpet_dog = + guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); // We'd better start at 0: - EXPECT_EQ(0UL, stats_store_.counter("server.watchdog_miss").value()); - auto unpet_dog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); + checkMiss(0); // At 300ms we shouldn't have hit the timeout yet: time_system_->sleep(std::chrono::milliseconds(300)); guard_dog_->forceCheckForTest(); - EXPECT_EQ(0UL, stats_store_.counter("server.watchdog_miss").value()); + checkMiss(0); // This should push it past the 500ms limit: time_system_->sleep(std::chrono::milliseconds(250)); guard_dog_->forceCheckForTest(); - EXPECT_EQ(1UL, stats_store_.counter("server.watchdog_miss").value()); + checkMiss(1); guard_dog_->stopWatching(unpet_dog); unpet_dog = nullptr; } @@ -229,17 +246,18 @@ TEST_P(GuardDogMissTest, MegaMissTest) { // This test checks the actual collected statistics after doing some timer // advances that should and shouldn't increment the counters. initGuardDog(stats_store_, config_mega_); - auto unpet_dog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); + auto unpet_dog = + guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); // We'd better start at 0: - EXPECT_EQ(0UL, stats_store_.counter("server.watchdog_mega_miss").value()); + checkMegaMiss(0); // This shouldn't be enough to increment the stat: time_system_->sleep(std::chrono::milliseconds(499)); guard_dog_->forceCheckForTest(); - EXPECT_EQ(0UL, stats_store_.counter("server.watchdog_mega_miss").value()); + checkMegaMiss(0); // Just 2ms more will make it greater than 500ms timeout: time_system_->sleep(std::chrono::milliseconds(2)); guard_dog_->forceCheckForTest(); - EXPECT_EQ(1UL, stats_store_.counter("server.watchdog_mega_miss").value()); + checkMegaMiss(1); guard_dog_->stopWatching(unpet_dog); unpet_dog = nullptr; } @@ -254,7 +272,8 @@ TEST_P(GuardDogMissTest, MissCountTest) { // spurious condition_variable wakeup causes the counter to get incremented // more than it should be. initGuardDog(stats_store_, config_miss_); - auto sometimes_pet_dog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); + auto sometimes_pet_dog = + guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); // These steps are executed once without ever touching the watchdog. // Then the last step is to touch the watchdog and repeat the steps. // This verifies that the behavior is reset back to baseline after a touch. @@ -263,17 +282,17 @@ TEST_P(GuardDogMissTest, MissCountTest) { // This shouldn't be enough to increment the stat: time_system_->sleep(std::chrono::milliseconds(499)); guard_dog_->forceCheckForTest(); - EXPECT_EQ(i, stats_store_.counter("server.watchdog_miss").value()); + checkMiss(i); // And if we force re-execution of the loop it still shouldn't be: guard_dog_->forceCheckForTest(); - EXPECT_EQ(i, stats_store_.counter("server.watchdog_miss").value()); + checkMiss(i); // Just 2ms more will make it greater than 500ms timeout: time_system_->sleep(std::chrono::milliseconds(2)); guard_dog_->forceCheckForTest(); - EXPECT_EQ(i + 1, stats_store_.counter("server.watchdog_miss").value()); + checkMiss(i + 1); // Spurious wakeup, we should still only have one miss counted. guard_dog_->forceCheckForTest(); - EXPECT_EQ(i + 1, stats_store_.counter("server.watchdog_miss").value()); + checkMiss(i + 1); // When we finally touch the dog we should get one more increment once the // timeout value expires: sometimes_pet_dog->touch(); @@ -281,10 +300,10 @@ TEST_P(GuardDogMissTest, MissCountTest) { time_system_->sleep(std::chrono::milliseconds(1000)); sometimes_pet_dog->touch(); // Make sure megamiss still works: - EXPECT_EQ(0UL, stats_store_.counter("server.watchdog_mega_miss").value()); + checkMegaMiss(0UL); time_system_->sleep(std::chrono::milliseconds(1500)); guard_dog_->forceCheckForTest(); - EXPECT_EQ(1UL, stats_store_.counter("server.watchdog_mega_miss").value()); + checkMegaMiss(1UL); guard_dog_->stopWatching(sometimes_pet_dog); sometimes_pet_dog = nullptr; @@ -314,7 +333,8 @@ TEST_P(GuardDogTestBase, WatchDogThreadIdTest) { NiceMock stats; NiceMock config(100, 90, 1000, 500); initGuardDog(stats, config); - auto watched_dog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId()); + auto watched_dog = + guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); EXPECT_EQ(watched_dog->threadId().debugString(), api_->threadFactory().currentThreadId().debugString()); guard_dog_->stopWatching(watched_dog); diff --git a/test/server/worker_impl_test.cc b/test/server/worker_impl_test.cc index 1c9273ddb12c..c7bc36e70941 100644 --- a/test/server/worker_impl_test.cc +++ b/test/server/worker_impl_test.cc @@ -28,7 +28,7 @@ class WorkerImplTest : public testing::Test { : api_(Api::createApiForTest()), dispatcher_(api_->allocateDispatcher()), no_exit_timer_(dispatcher_->createTimer([]() -> void {})), worker_(tls_, hooks_, std::move(dispatcher_), Network::ConnectionHandlerPtr{handler_}, - overload_manager_, *api_) { + overload_manager_, *api_, "worker_test") { // In the real worker the watchdog has timers that prevent exit. Here we need to prevent event // loop exit since we use mock timers. no_exit_timer_->enableTimer(std::chrono::hours(1)); From 6e4c1e9ce6bb7e289c56f1620af031b1e8260887 Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Mon, 16 Sep 2019 21:09:31 -0700 Subject: [PATCH 2/6] fix Signed-off-by: Matt Klein --- source/common/event/BUILD | 17 ----------------- test/common/event/BUILD | 16 ---------------- 2 files changed, 33 deletions(-) diff --git a/source/common/event/BUILD b/source/common/event/BUILD index 76c0dc627cc2..612bcfcf31ec 100644 --- a/source/common/event/BUILD +++ b/source/common/event/BUILD @@ -123,20 +123,3 @@ envoy_cc_library( "//source/common/common:scope_tracker", ], ) - -envoy_cc_library( - name = "dispatched_thread_lib", - srcs = ["dispatched_thread.cc"], - hdrs = ["dispatched_thread.h"], - external_deps = [ - "event", - ], - deps = [ - ":dispatcher_lib", - "//include/envoy/api:api_interface", - "//include/envoy/event:dispatcher_interface", - "//source/common/common:minimal_logger_lib", - "//source/common/common:thread_lib", - "//source/server:guarddog_lib", - ], -) diff --git a/test/common/event/BUILD b/test/common/event/BUILD index 4e432fb57625..f3215f14dd1d 100644 --- a/test/common/event/BUILD +++ b/test/common/event/BUILD @@ -35,19 +35,3 @@ envoy_cc_test( "//test/test_common:utility_lib", ], ) - -envoy_cc_test( - name = "dispatched_thread_impl_test", - srcs = ["dispatched_thread_impl_test.cc"], - deps = [ - "//source/common/api:api_lib", - "//source/common/common:utility_lib", - "//source/common/event:dispatched_thread_lib", - "//source/server:guarddog_lib", - "//test/mocks:common_lib", - "//test/mocks/server:server_mocks", - "//test/mocks/stats:stats_mocks", - "//test/test_common:test_time_lib", - "//test/test_common:utility_lib", - ], -) From 5c8720931da4b4ebded36999195579ff233f7865 Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Tue, 17 Sep 2019 08:46:24 -0700 Subject: [PATCH 3/6] fix Signed-off-by: Matt Klein --- .../filters/listener/proxy_protocol/proxy_protocol_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/extensions/filters/listener/proxy_protocol/proxy_protocol_test.cc b/test/extensions/filters/listener/proxy_protocol/proxy_protocol_test.cc index 683bdfc1c5fd..bfec9121e16f 100644 --- a/test/extensions/filters/listener/proxy_protocol/proxy_protocol_test.cc +++ b/test/extensions/filters/listener/proxy_protocol/proxy_protocol_test.cc @@ -51,7 +51,7 @@ class ProxyProtocolTest : public testing::TestWithParamallocateDispatcher()), socket_(Network::Test::getCanonicalLoopbackAddress(GetParam()), nullptr, true), - connection_handler_(new Server::ConnectionHandlerImpl(ENVOY_LOGGER(), *dispatcher_)), + connection_handler_(new Server::ConnectionHandlerImpl(*dispatcher_, "test_thread")), name_("proxy"), filter_chain_(Network::Test::createEmptyFilterChainWithRawBufferSockets()) { connection_handler_->addListener(*this); @@ -890,7 +890,7 @@ class WildcardProxyProtocolTest : public testing::TestWithParamip()->port())), - connection_handler_(new Server::ConnectionHandlerImpl(ENVOY_LOGGER(), *dispatcher_)), + connection_handler_(new Server::ConnectionHandlerImpl(*dispatcher_, "test_thread")), name_("proxy"), filter_chain_(Network::Test::createEmptyFilterChainWithRawBufferSockets()) { connection_handler_->addListener(*this); conn_ = dispatcher_->createClientConnection(local_dst_address_, From 98fea15f1efe7d32592c7fc980cd343292ff6bf1 Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Wed, 18 Sep 2019 20:37:22 -0700 Subject: [PATCH 4/6] comment Signed-off-by: Matt Klein --- docs/root/configuration/listeners/stats.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/root/configuration/listeners/stats.rst b/docs/root/configuration/listeners/stats.rst index 4615698df1e5..5686bf3b4f45 100644 --- a/docs/root/configuration/listeners/stats.rst +++ b/docs/root/configuration/listeners/stats.rst @@ -50,7 +50,7 @@ on either accepted or active connections. :widths: 1, 1, 2 downstream_cx_total, Counter, Total connections on this handler. - downstream_cx_active, Counter, Total active connections on this handler. + downstream_cx_active, Gauge, Total active connections on this handler. Listener manager ---------------- From 5bdfb012ecd028272eeace29300e52e971f5e574 Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Wed, 18 Sep 2019 20:46:31 -0700 Subject: [PATCH 5/6] format Signed-off-by: Matt Klein --- source/server/connection_handler_impl.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/server/connection_handler_impl.cc b/source/server/connection_handler_impl.cc index 8e4ae8d07967..f21c1715821a 100644 --- a/source/server/connection_handler_impl.cc +++ b/source/server/connection_handler_impl.cc @@ -34,8 +34,7 @@ void ConnectionHandlerImpl::addListener(Network::ListenerConfig& config) { listener = std::make_unique(*this, config); } else { ASSERT(config.udpListenerFactory() != nullptr, "UDP listener factory is not initialized."); - listener = - config.udpListenerFactory()->createActiveUdpListener(*this, dispatcher_, config); + listener = config.udpListenerFactory()->createActiveUdpListener(*this, dispatcher_, config); } if (disable_listeners_) { From e1a0c99fe481dcb76e3900e0a62718df99056ac5 Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Fri, 20 Sep 2019 09:36:21 -0700 Subject: [PATCH 6/6] comments Signed-off-by: Matt Klein --- .../quiche/active_quic_listener_test.cc | 3 +- test/integration/integration_test.cc | 4 ++ test/server/guarddog_impl_test.cc | 40 ++++++++++--------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/test/extensions/quic_listeners/quiche/active_quic_listener_test.cc b/test/extensions/quic_listeners/quiche/active_quic_listener_test.cc index ebfba7f3b464..03e32d788b62 100644 --- a/test/extensions/quic_listeners/quiche/active_quic_listener_test.cc +++ b/test/extensions/quic_listeners/quiche/active_quic_listener_test.cc @@ -42,8 +42,7 @@ class ActiveQuicListenerPeer { } }; -class ActiveQuicListenerTest : public testing::TestWithParam, - protected Logger::Loggable { +class ActiveQuicListenerTest : public testing::TestWithParam { public: ActiveQuicListenerTest() : version_(GetParam()), api_(Api::createApiForTest(simulated_time_system_)), diff --git a/test/integration/integration_test.cc b/test/integration/integration_test.cc index 66496cf06e06..f42e5c11debf 100644 --- a/test/integration/integration_test.cc +++ b/test/integration/integration_test.cc @@ -53,6 +53,10 @@ INSTANTIATE_TEST_SUITE_P(IpVersions, IntegrationTest, TestUtility::ipTestParamsToString); // Make sure we have correctly specified per-worker performance stats. +// TODO(mattklein123): We should flesh this test out to a) actually use more than 1 worker and +// b) do some real requests and verify things work correctly on a per-worker basis. I will do this +// in my next change when I add optional CX balancing as it well then be easier to write a +// deterministic test. TEST_P(IntegrationTest, PerWorkerStats) { initialize(); diff --git a/test/server/guarddog_impl_test.cc b/test/server/guarddog_impl_test.cc index 5d04fe52bc5e..011ba8dbfcb8 100644 --- a/test/server/guarddog_impl_test.cc +++ b/test/server/guarddog_impl_test.cc @@ -197,17 +197,21 @@ class GuardDogMissTest : public GuardDogTestBase { protected: GuardDogMissTest() : config_miss_(500, 1000, 0, 0), config_mega_(1000, 500, 0, 0) {} - void checkMiss(uint64_t count) { - EXPECT_EQ(count, TestUtility::findCounter(stats_store_, "server.watchdog_miss")->value()); + void checkMiss(uint64_t count, const std::string& descriptor) { + EXPECT_EQ(count, TestUtility::findCounter(stats_store_, "server.watchdog_miss")->value()) + << descriptor; EXPECT_EQ(count, - TestUtility::findCounter(stats_store_, "server.test_thread.watchdog_miss")->value()); + TestUtility::findCounter(stats_store_, "server.test_thread.watchdog_miss")->value()) + << descriptor; } - void checkMegaMiss(uint64_t count) { - EXPECT_EQ(count, TestUtility::findCounter(stats_store_, "server.watchdog_mega_miss")->value()); + void checkMegaMiss(uint64_t count, const std::string& descriptor) { + EXPECT_EQ(count, TestUtility::findCounter(stats_store_, "server.watchdog_mega_miss")->value()) + << descriptor; EXPECT_EQ( count, - TestUtility::findCounter(stats_store_, "server.test_thread.watchdog_mega_miss")->value()); + TestUtility::findCounter(stats_store_, "server.test_thread.watchdog_mega_miss")->value()) + << descriptor; } NiceMock config_miss_; @@ -224,15 +228,15 @@ TEST_P(GuardDogMissTest, MissTest) { auto unpet_dog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); // We'd better start at 0: - checkMiss(0); + checkMiss(0, "MissTest check 1"); // At 300ms we shouldn't have hit the timeout yet: time_system_->sleep(std::chrono::milliseconds(300)); guard_dog_->forceCheckForTest(); - checkMiss(0); + checkMiss(0, "MissTest check 2"); // This should push it past the 500ms limit: time_system_->sleep(std::chrono::milliseconds(250)); guard_dog_->forceCheckForTest(); - checkMiss(1); + checkMiss(1, "MissTest check 3"); guard_dog_->stopWatching(unpet_dog); unpet_dog = nullptr; } @@ -249,15 +253,15 @@ TEST_P(GuardDogMissTest, MegaMissTest) { auto unpet_dog = guard_dog_->createWatchDog(api_->threadFactory().currentThreadId(), "test_thread"); // We'd better start at 0: - checkMegaMiss(0); + checkMegaMiss(0, "MegaMissTest check 1"); // This shouldn't be enough to increment the stat: time_system_->sleep(std::chrono::milliseconds(499)); guard_dog_->forceCheckForTest(); - checkMegaMiss(0); + checkMegaMiss(0, "MegaMissTest check 2"); // Just 2ms more will make it greater than 500ms timeout: time_system_->sleep(std::chrono::milliseconds(2)); guard_dog_->forceCheckForTest(); - checkMegaMiss(1); + checkMegaMiss(1, "MegaMissTest check 3"); guard_dog_->stopWatching(unpet_dog); unpet_dog = nullptr; } @@ -282,17 +286,17 @@ TEST_P(GuardDogMissTest, MissCountTest) { // This shouldn't be enough to increment the stat: time_system_->sleep(std::chrono::milliseconds(499)); guard_dog_->forceCheckForTest(); - checkMiss(i); + checkMiss(i, "MissCountTest check 1"); // And if we force re-execution of the loop it still shouldn't be: guard_dog_->forceCheckForTest(); - checkMiss(i); + checkMiss(i, "MissCountTest check 2"); // Just 2ms more will make it greater than 500ms timeout: time_system_->sleep(std::chrono::milliseconds(2)); guard_dog_->forceCheckForTest(); - checkMiss(i + 1); + checkMiss(i + 1, "MissCountTest check 3"); // Spurious wakeup, we should still only have one miss counted. guard_dog_->forceCheckForTest(); - checkMiss(i + 1); + checkMiss(i + 1, "MissCountTest check 4"); // When we finally touch the dog we should get one more increment once the // timeout value expires: sometimes_pet_dog->touch(); @@ -300,10 +304,10 @@ TEST_P(GuardDogMissTest, MissCountTest) { time_system_->sleep(std::chrono::milliseconds(1000)); sometimes_pet_dog->touch(); // Make sure megamiss still works: - checkMegaMiss(0UL); + checkMegaMiss(0UL, "MissCountTest check 5"); time_system_->sleep(std::chrono::milliseconds(1500)); guard_dog_->forceCheckForTest(); - checkMegaMiss(1UL); + checkMegaMiss(1UL, "MissCountTest check 6"); guard_dog_->stopWatching(sometimes_pet_dog); sometimes_pet_dog = nullptr;