From f1a6056953bd990233c2f94e972d456c910bcfe9 Mon Sep 17 00:00:00 2001 From: Michael Bell Date: Sun, 29 Aug 2021 20:35:22 +0100 Subject: [PATCH] Immediately close bad connections to prevent file exhaustion osrm-routed does not immediately clean up a keep-alive connection when the client closes it. Instead it waits for five seconds of inactivity before removing. Given a setup with low file limits and clients opening and closing a lot of keep-alive connections, it's possible for osrm-routed to run out of file descriptors whilst it waits for the clean-up to trigger. Furthermore, this causes the connection acceptor loop to exit. Even after the old connections are cleaned up, new ones will not be created. Any new requests will block until the server is restarted. This commit improves the situation by: - Immediately closing connections on error. This includes EOF errors indicating that the client has closed the connection. This releases resources early (including the open file) and doesn't wait for the timer. - Log when the acceptor loop exits. Whilst this means the behaviour can still occur for reasons other than too many open files, we will at least have visibility of the cause and can investigate further. --- CHANGELOG.md | 1 + include/server/server.hpp | 4 ++++ src/server/connection.cpp | 16 +++++++++++++--- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1037c5109aa..0bcf36c0af3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ - Changes from 5.25.0 - API: - FIXED: Allow for special characters in the profile/method as part of the HTTP URL. [#6090](https://github.com/Project-OSRM/osrm-backend/pull/6090) + - FIXED: Set osrm-routed to immediately close bad connections [#6112](https://github.com/Project-OSRM/osrm-backend/pull/6112) - Build: - CHANGED: Replace Travis with Github Actions for CI builds [#6071](https://github.com/Project-OSRM/osrm-backend/pull/6071) - FIXED: Fixed Boost link flags in pkg-config file. [#6083](https://github.com/Project-OSRM/osrm-backend/pull/6083) diff --git a/include/server/server.hpp b/include/server/server.hpp index 53098fa61c5..06d2ec8e92b 100644 --- a/include/server/server.hpp +++ b/include/server/server.hpp @@ -101,6 +101,10 @@ class Server new_connection->socket(), boost::bind(&Server::HandleAccept, this, boost::asio::placeholders::error)); } + else + { + util::Log(logERROR) << "HandleAccept error: " << e.message(); + } } unsigned thread_pool_size; diff --git a/src/server/connection.cpp b/src/server/connection.cpp index d6441977f2c..31fed90e8e2 100644 --- a/src/server/connection.cpp +++ b/src/server/connection.cpp @@ -3,13 +3,10 @@ #include "server/request_parser.hpp" #include -#include #include #include #include -#include -#include #include namespace osrm @@ -48,6 +45,12 @@ void Connection::handle_read(const boost::system::error_code &error, std::size_t { if (error) { + if (error != boost::asio::error::operation_aborted) + { + // Error not triggered by timer expiry, commence connection shutdown. + util::Log(logDEBUG) << "Connection read error: " << error.message(); + handle_shutdown(); + } return; } @@ -73,6 +76,7 @@ void Connection::handle_read(const boost::system::error_code &error, std::size_t current_request.endpoint = TCP_socket.remote_endpoint(ec).address(); if (ec) { + util::Log(logDEBUG) << "Socket remote endpoint error: " << ec.message(); handle_shutdown(); return; } @@ -165,6 +169,10 @@ void Connection::handle_write(const boost::system::error_code &error) handle_shutdown(); } } + else + { + util::Log(logDEBUG) << "Connection write error: " << error.message(); + } } /// Handle completion of a timeout timer.. @@ -183,6 +191,8 @@ void Connection::handle_timeout(boost::system::error_code ec) void Connection::handle_shutdown() { + // Cancel timer to ensure all resources are released immediately on shutdown. + timer.cancel(); // Initiate graceful connection closure. boost::system::error_code ignore_error; TCP_socket.shutdown(boost::asio::ip::tcp::socket::shutdown_both, ignore_error);