Skip to content

Commit

Permalink
TCP listener connection rebalancing
Browse files Browse the repository at this point in the history
This commit introduces optional connection rebalancing
for TCP listeners, targeted as cases where there are a
small number of long lived connections such as service
mesh HTTP2/gRPC egress.

Part of this change involved tracking connection counts
at the per-listener level, which made it clear that we
have quite a bit of tech debt in some of our interfaces
in this area. I did various cleanups in service of this
change which leave the connection handler / accept path
in a cleaner state.

Fixes #4602

Signed-off-by: Matt Klein <[email protected]>
  • Loading branch information
mattklein123 committed Sep 29, 2019
1 parent ef9139a commit c1ca210
Show file tree
Hide file tree
Showing 51 changed files with 832 additions and 595 deletions.
26 changes: 25 additions & 1 deletion api/envoy/api/v2/lds.proto
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ service ListenerDiscoveryService {
}
}

// [#comment:next free field: 20]
// [#comment:next free field: 21]
message Listener {
enum DrainType {
// Drain in response to calling /healthcheck/fail admin endpoint (along with the health check
Expand Down Expand Up @@ -70,6 +70,25 @@ message Listener {
google.protobuf.BoolValue bind_to_port = 1;
}

// Configuration for listener connection balancing.
message ConnectionBalanceConfig {
// A connection balancer implementation that does exact balancing. This means that a lock is
// held during balancing so that connection counts are nearly exactly balanced between worker
// threads. This is "nearly" exact in the sense that a connection might close in parallel thus
// making the counts incorrect, but this should be rectified on the next accept. This balancer
// sacrifices accept throughput for accuracy and should be used when there are a small number of
// connections that rarely cycle (e.g., service mesh gRPC egress).
message ExactBalance {
}

oneof balance_type {
option (validate.required) = true;

// If specified, the listener will use the exact connection balancer.
ExactBalance exact_balance = 1;
}
}

reserved 14;

// The unique name by which this listener is known. If no name is provided,
Expand Down Expand Up @@ -214,4 +233,9 @@ message Listener {
// socket listener and the various types of API listener. That way, a given Listener message
// can structurally only contain the fields of the relevant type.]
config.listener.v2.ApiListener api_listener = 19;

// The listener's connection balancer configuration, currently only applicable to TCP listeners.
// If no configuration is specified, Envoy will not attempt to balance active connections between
// worker threads.
ConnectionBalanceConfig connection_balance_config = 20;
}
2 changes: 2 additions & 0 deletions api/envoy/api/v2/listener/quic_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ option ruby_package = "Envoy.Api.V2.ListenerNS";
import "google/protobuf/duration.proto";
import "google/protobuf/wrappers.proto";

// [#protodoc-title: QUIC listener Config]

// Configuration specific to the QUIC protocol.
// Next id: 4
message QuicProtocolOptions {
Expand Down
2 changes: 1 addition & 1 deletion api/envoy/api/v2/listener/udp_listener_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ option ruby_package = "Envoy.Api.V2.ListenerNS";
import "google/protobuf/any.proto";
import "google/protobuf/struct.proto";

// [#protodoc-title: Udp Listener Config]
// [#protodoc-title: UDP Listener Config]
// Listener :ref:`configuration overview <config_listeners>`

message UdpListenerConfig {
Expand Down
26 changes: 25 additions & 1 deletion api/envoy/api/v3alpha/lds.proto
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ service ListenerDiscoveryService {
}
}

// [#comment:next free field: 20]
// [#comment:next free field: 21]
message Listener {
enum DrainType {
// Drain in response to calling /healthcheck/fail admin endpoint (along with the health check
Expand Down Expand Up @@ -70,6 +70,25 @@ message Listener {
google.protobuf.BoolValue bind_to_port = 1;
}

// Configuration for listener connection balancing.
message ConnectionBalanceConfig {
// A connection balancer implementation that does exact balancing. This means that a lock is
// held during balancing so that connection counts are nearly exactly balanced between worker
// threads. This is "nearly" exact in the sense that a connection might close in parallel thus
// making the counts incorrect, but this should be rectified on the next accept. This balancer
// sacrifices accept throughput for accuracy and should be used when there are a small number of
// connections that rarely cycle (e.g., service mesh gRPC egress).
message ExactBalance {
}

oneof balance_type {
option (validate.required) = true;

// If specified, the listener will use the exact connection balancer.
ExactBalance exact_balance = 1;
}
}

reserved 14, 4;

reserved "use_original_dst";
Expand Down Expand Up @@ -199,4 +218,9 @@ message Listener {
// socket listener and the various types of API listener. That way, a given Listener message
// can structurally only contain the fields of the relevant type.]
config.listener.v3alpha.ApiListener api_listener = 19;

// The listener's connection balancer configuration, currently only applicable to TCP listeners.
// If no configuration is specified, Envoy will not attempt to balance active connections between
// worker threads.
ConnectionBalanceConfig connection_balance_config = 20;
}
2 changes: 2 additions & 0 deletions api/envoy/api/v3alpha/listener/quic_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ option java_package = "io.envoyproxy.envoy.api.v3alpha.listener";
import "google/protobuf/duration.proto";
import "google/protobuf/wrappers.proto";

// [#protodoc-title: QUIC listener Config]

// Configuration specific to the QUIC protocol.
// Next id: 4
message QuicProtocolOptions {
Expand Down
2 changes: 1 addition & 1 deletion api/envoy/api/v3alpha/listener/udp_listener_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ option java_package = "io.envoyproxy.envoy.api.v3alpha.listener";
import "google/protobuf/any.proto";
import "google/protobuf/struct.proto";

// [#protodoc-title: Udp Listener Config]
// [#protodoc-title: UDP Listener Config]
// Listener :ref:`configuration overview <config_listeners>`

message UdpListenerConfig {
Expand Down
13 changes: 13 additions & 0 deletions docs/root/intro/arch_overview/intro/threading_model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,16 @@ largely single threaded (embarrassingly parallel) with a small amount of more co
coordination between the worker threads. Generally Envoy is written to be 100% non-blocking and for
most workloads we recommend configuring the number of worker threads to be equal to the number of
hardware threads on the machine.

Listener connection balancing
-----------------------------

By default, there is no coordination between worker threads. This means that all worker threads
independently attempt to accept connections on each listener and rely on the kernel to perform
adequate balancing between threads. For most workloads, the kernel does a very good job of
balancing incoming connections. However, for some workloads, particularly those that have a small
number of very long lived connections (e.g., service mesh HTTP2/gRPC egress), it may be desirable
to have Envoy forcibly balance connections between worker threads. To support this behavior,
Envoy allows for different types of :ref:`connection balancing
<envoy_api_field_Listener.connection_balance_config>` to be configured on each :ref:`listener
<arch_overview_listeners>`.
2 changes: 2 additions & 0 deletions docs/root/intro/version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ Version history
* http: absolute URL support is now on by default. The prior behavior can be reinstated by setting :ref:`allow_absolute_url <envoy_api_field_core.Http1ProtocolOptions.allow_absolute_url>` to false.
* listeners: added :ref:`continue_on_listener_filters_timeout <envoy_api_field_Listener.continue_on_listener_filters_timeout>` to configure whether a listener will still create a connection when listener filters time out.
* listeners: added :ref:`HTTP inspector listener filter <config_listener_filters_http_inspector>`.
* listeners: added :ref:`connection balancer <envoy_api_field_Listener.connection_balance_config>`
configuration for TCP listeners.
* lua: extended `httpCall()` and `respond()` APIs to accept headers with entry values that can be a string or table of strings.
* metrics_service: added support for flushing histogram buckets.
* outlier_detector: added :ref:`support for the grpc-status response header <arch_overview_outlier_detection_grpc>` by mapping it to HTTP status. Guarded by envoy.reloadable_features.outlier_detection_support_for_grpc_status which defaults to true.
Expand Down
7 changes: 2 additions & 5 deletions include/envoy/event/dispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,10 @@ class Dispatcher {
* @param socket supplies the socket to listen on.
* @param cb supplies the callbacks to invoke for listener events.
* @param bind_to_port controls whether the listener binds to a transport port or not.
* @param hand_off_restored_destination_connections controls whether the listener searches for
* another listener after restoring the destination address of a new connection.
* @return Network::ListenerPtr a new listener that is owned by the caller.
*/
virtual Network::ListenerPtr createListener(Network::Socket& socket,
Network::ListenerCallbacks& cb, bool bind_to_port,
bool hand_off_restored_destination_connections) PURE;
virtual Network::ListenerPtr
createListener(Network::Socket& socket, Network::ListenerCallbacks& cb, bool bind_to_port) PURE;

/**
* Creates a logical udp listener on a specific port.
Expand Down
9 changes: 9 additions & 0 deletions include/envoy/network/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,19 @@ envoy_cc_library(
],
)

envoy_cc_library(
name = "connection_balancer_interface",
hdrs = ["connection_balancer.h"],
deps = [
":listen_socket_interface",
],
)

envoy_cc_library(
name = "listener_interface",
hdrs = ["listener.h"],
deps = [
":connection_balancer_interface",
":connection_interface",
":listen_socket_interface",
"//include/envoy/stats:stats_interface",
Expand Down
70 changes: 70 additions & 0 deletions include/envoy/network/connection_balancer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#pragma once

#include "envoy/network/listen_socket.h"

namespace Envoy {
namespace Network {

/**
* A connection handler that is balanced. Typically implemented by individual listeners depending
* on their balancing configuration.
*/
class BalancedConnectionHandler {
public:
virtual ~BalancedConnectionHandler() = default;

/**
* @return the number of active connections within the handler.
*/
virtual uint64_t numConnections() const PURE;

/**
* Increment the number of connections within the handler. This must be called by a connection
* balancer implementation prior to a connection being rebalanced to another handler. This makes
* sure that connection counts are accurate during connection transfer (i.e., that the target
* balancer accounts for the incoming connection).
*/
virtual void incNumConnections() PURE;

/**
* Post a connected socket to this connection handler. This is used for cross-thread connection
* transfer during the balancing process.
*/
virtual void post(Network::ConnectionSocketPtr&& socket) PURE;
};

/**
* An implementation of a connection balancer. This abstracts the underlying policy (e.g., exact,
* fuzzy, etc.).
*/
class ConnectionBalancer {
public:
virtual ~ConnectionBalancer() = default;

/**
* Register a new handler with the balancer that is available for balancing.
*/
virtual void registerHandler(BalancedConnectionHandler& handler) PURE;

/**
* Unregister a handler with the balancer that is no longer available for balancing.
*/
virtual void unregisterHandler(BalancedConnectionHandler& handler) PURE;

/**
* Determine whether a connection should be rebalanced or not.
* @param socket supplies the socket that is eligible for balancing.
* @param current_handler supplies the currently executing connection handler.
* @return Rebalanced if the socket has been moved to another connection handler or Continue if
* the socket should be processed on the currently executing connection handler.
*/
enum class BalanceConnectionResult { Rebalanced, Continue };
virtual BalanceConnectionResult
balanceConnection(Network::ConnectionSocketPtr&& socket,
BalancedConnectionHandler& current_handler) PURE;
};

using ConnectionBalancerPtr = std::unique_ptr<ConnectionBalancer>;

} // namespace Network
} // namespace Envoy
8 changes: 0 additions & 8 deletions include/envoy/network/connection_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,6 @@ class ConnectionHandler {
*/
virtual void addListener(ListenerConfig& config) PURE;

/**
* Find a listener based on the provided listener address value.
* @param address supplies the address value.
* @return a pointer to the listener or nullptr if not found.
* Ownership of the listener is NOT transferred
*/
virtual Network::Listener* findListenerByAddress(const Network::Address::Instance& address) PURE;

/**
* Remove listeners using the listener tag as a key. All connections owned by the removed
* listeners will be closed.
Expand Down
20 changes: 8 additions & 12 deletions include/envoy/network/listener.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
#include "envoy/api/io_error.h"
#include "envoy/common/exception.h"
#include "envoy/network/connection.h"
#include "envoy/network/connection_balancer.h"
#include "envoy/network/listen_socket.h"
#include "envoy/stats/scope.h"

namespace Envoy {
namespace Network {

class UdpListenerFilterManager;
class ActiveUdpListenerFactory;

/**
Expand Down Expand Up @@ -97,6 +97,12 @@ class ListenerConfig {
* nullptr.
*/
virtual const ActiveUdpListenerFactory* udpListenerFactory() PURE;

/**
* @return the connection balancer for this listener. All listeners have a connection balancer,
* though the implementation may be a NOP balancer.
*/
virtual ConnectionBalancer& connectionBalancer() PURE;
};

/**
Expand All @@ -109,18 +115,8 @@ class ListenerCallbacks {
/**
* Called when a new connection is accepted.
* @param socket supplies the socket that is moved into the callee.
* @param hand_off_restored_destination_connections is true when the socket was first accepted by
* another listener and is redirected to a new listener. The recipient should not redirect the
* socket any further.
*/
virtual void onAccept(ConnectionSocketPtr&& socket,
bool hand_off_restored_destination_connections = true) PURE;

/**
* Called when a new connection is accepted.
* @param new_connection supplies the new connection that is moved into the callee.
*/
virtual void onNewConnection(ConnectionPtr&& new_connection) PURE;
virtual void onAccept(ConnectionSocketPtr&& socket) PURE;
};

/**
Expand Down
9 changes: 4 additions & 5 deletions source/common/event/dispatcher_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,11 @@ Filesystem::WatcherPtr DispatcherImpl::createFilesystemWatcher() {
return Filesystem::WatcherPtr{new Filesystem::WatcherImpl(*this)};
}

Network::ListenerPtr
DispatcherImpl::createListener(Network::Socket& socket, Network::ListenerCallbacks& cb,
bool bind_to_port, bool hand_off_restored_destination_connections) {
Network::ListenerPtr DispatcherImpl::createListener(Network::Socket& socket,
Network::ListenerCallbacks& cb,
bool bind_to_port) {
ASSERT(isThreadSafe());
return Network::ListenerPtr{new Network::ListenerImpl(*this, socket, cb, bind_to_port,
hand_off_restored_destination_connections)};
return std::make_unique<Network::ListenerImpl>(*this, socket, cb, bind_to_port);
}

Network::UdpListenerPtr DispatcherImpl::createUdpListener(Network::Socket& socket,
Expand Down
3 changes: 1 addition & 2 deletions source/common/event/dispatcher_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ class DispatcherImpl : Logger::Loggable<Logger::Id::main>,
uint32_t events) override;
Filesystem::WatcherPtr createFilesystemWatcher() override;
Network::ListenerPtr createListener(Network::Socket& socket, Network::ListenerCallbacks& cb,
bool bind_to_port,
bool hand_off_restored_destination_connections) override;
bool bind_to_port) override;
Network::UdpListenerPtr createUdpListener(Network::Socket& socket,
Network::UdpListenerCallbacks& cb) override;
TimerPtr createTimer(TimerCb cb) override;
Expand Down
9 changes: 9 additions & 0 deletions source/common/network/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ envoy_cc_library(
],
)

envoy_cc_library(
name = "connection_balancer_lib",
srcs = ["connection_balancer_impl.cc"],
hdrs = ["connection_balancer_impl.h"],
deps = [
"//include/envoy/network:connection_balancer_interface",
],
)

envoy_cc_library(
name = "connection_lib",
srcs = ["connection_impl.cc"],
Expand Down
Loading

0 comments on commit c1ca210

Please sign in to comment.