From a2f5b7a02cfe6a0c96584f3ae60e928955af5502 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Tue, 19 Feb 2019 14:05:00 +0100 Subject: [PATCH 01/20] Revert "CheckerComponent#Stop(): wait for all running checks" This reverts commit 1eaad0637b75bd24875b1e62fd080fb155b08ed7. --- lib/base/defer.hpp | 59 -------------------------------- lib/checker/checkercomponent.cpp | 12 ------- lib/checker/checkercomponent.hpp | 3 -- 3 files changed, 74 deletions(-) delete mode 100644 lib/base/defer.hpp diff --git a/lib/base/defer.hpp b/lib/base/defer.hpp deleted file mode 100644 index 05ae0b81996..00000000000 --- a/lib/base/defer.hpp +++ /dev/null @@ -1,59 +0,0 @@ -/****************************************************************************** - * Icinga 2 * - * Copyright (C) 2012-2018 Icinga Development Team (https://icinga.com/) * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of the GNU General Public License * - * as published by the Free Software Foundation; either version 2 * - * of the License, or (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the Free Software Foundation * - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. * - ******************************************************************************/ - -#ifndef DEFER -#define DEFER - -#include -#include - -namespace icinga -{ - -/** - * An action to be executed at end of scope. - * - * @ingroup base - */ -class Defer -{ -public: - inline - Defer(std::function func) : m_Func(std::move(func)) - { - } - - Defer(const Defer&) = delete; - Defer(Defer&&) = delete; - Defer& operator=(const Defer&) = delete; - Defer& operator=(Defer&&) = delete; - - inline - ~Defer() - { - m_Func(); - } - -private: - std::function m_Func; -}; - -} - -#endif /* DEFER */ diff --git a/lib/checker/checkercomponent.cpp b/lib/checker/checkercomponent.cpp index 8878a9d292a..c14259b88b1 100644 --- a/lib/checker/checkercomponent.cpp +++ b/lib/checker/checkercomponent.cpp @@ -23,7 +23,6 @@ #include "icinga/cib.hpp" #include "remote/apilistener.hpp" #include "base/configtype.hpp" -#include "base/defer.hpp" #include "base/objectlock.hpp" #include "base/utility.hpp" #include "base/perfdatavalue.hpp" @@ -74,7 +73,6 @@ void CheckerComponent::Start(bool runtimeCreated) Log(LogInformation, "CheckerComponent") << "'" << GetName() << "' started."; - m_RunningChecks.store(0); m_Thread = std::thread(std::bind(&CheckerComponent::CheckThreadProc, this)); @@ -95,10 +93,6 @@ void CheckerComponent::Stop(bool runtimeRemoved) m_ResultTimer->Stop(); m_Thread.join(); - while (m_RunningChecks.load()) { - Utility::Sleep(1.0 / 60.0); - } - Log(LogInformation, "CheckerComponent") << "'" << GetName() << "' stopped."; @@ -213,8 +207,6 @@ void CheckerComponent::CheckThreadProc() Checkable::IncreasePendingChecks(); - m_RunningChecks.fetch_add(1); - Utility::QueueAsyncCallback(std::bind(&CheckerComponent::ExecuteCheckHelper, CheckerComponent::Ptr(this), checkable)); lock.lock(); @@ -223,10 +215,6 @@ void CheckerComponent::CheckThreadProc() void CheckerComponent::ExecuteCheckHelper(const Checkable::Ptr& checkable) { - Defer decrementRunningChecks ([this]{ - m_RunningChecks.fetch_sub(1); - }); - try { checkable->ExecuteCheck(); } catch (const std::exception& ex) { diff --git a/lib/checker/checkercomponent.hpp b/lib/checker/checkercomponent.hpp index 3c2753973ef..f69f5092e0b 100644 --- a/lib/checker/checkercomponent.hpp +++ b/lib/checker/checkercomponent.hpp @@ -25,13 +25,11 @@ #include "base/configobject.hpp" #include "base/timer.hpp" #include "base/utility.hpp" -#include #include #include #include #include #include -#include #include namespace icinga @@ -92,7 +90,6 @@ class CheckerComponent final : public ObjectImpl boost::condition_variable m_CV; bool m_Stopped{false}; std::thread m_Thread; - std::atomic m_RunningChecks; CheckableSet m_IdleCheckables; CheckableSet m_PendingCheckables; From d2e049d57506b6314e04ed16be99e2d94ad8b186 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Tue, 19 Feb 2019 14:05:07 +0100 Subject: [PATCH 02/20] Revert "CheckerComponent#Stop(): log after actually being stopped" This reverts commit b81aa6af3a23cba179b9da03fda6d2674ccc0956. --- lib/checker/checkercomponent.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/checker/checkercomponent.cpp b/lib/checker/checkercomponent.cpp index c14259b88b1..cde975b7251 100644 --- a/lib/checker/checkercomponent.cpp +++ b/lib/checker/checkercomponent.cpp @@ -84,6 +84,9 @@ void CheckerComponent::Start(bool runtimeCreated) void CheckerComponent::Stop(bool runtimeRemoved) { + Log(LogInformation, "CheckerComponent") + << "'" << GetName() << "' stopped."; + { boost::mutex::scoped_lock lock(m_Mutex); m_Stopped = true; @@ -93,9 +96,6 @@ void CheckerComponent::Stop(bool runtimeRemoved) m_ResultTimer->Stop(); m_Thread.join(); - Log(LogInformation, "CheckerComponent") - << "'" << GetName() << "' stopped."; - ObjectImpl::Stop(runtimeRemoved); } From 4edd3ece66a8345d34424068f2140ffaffb809e9 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Tue, 19 Feb 2019 14:06:15 +0100 Subject: [PATCH 03/20] Revert "Deactivate perfdata writers after checker" This reverts commit f0e12ff63ce138b6df9e85bc690eda46ae794ccb. --- lib/perfdata/elasticsearchwriter.ti | 2 +- lib/perfdata/gelfwriter.ti | 2 +- lib/perfdata/graphitewriter.ti | 2 +- lib/perfdata/influxdbwriter.ti | 2 +- lib/perfdata/opentsdbwriter.ti | 2 +- lib/perfdata/perfdatawriter.ti | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/perfdata/elasticsearchwriter.ti b/lib/perfdata/elasticsearchwriter.ti index c3e7fc04646..7cf604fb94a 100644 --- a/lib/perfdata/elasticsearchwriter.ti +++ b/lib/perfdata/elasticsearchwriter.ti @@ -7,7 +7,7 @@ namespace icinga class ElasticsearchWriter : ConfigObject { - activation_priority 80; + activation_priority 100; [config, required] String host { default {{{ return "127.0.0.1"; }}} diff --git a/lib/perfdata/gelfwriter.ti b/lib/perfdata/gelfwriter.ti index 92d4a217604..d13ee55d282 100644 --- a/lib/perfdata/gelfwriter.ti +++ b/lib/perfdata/gelfwriter.ti @@ -26,7 +26,7 @@ namespace icinga class GelfWriter : ConfigObject { - activation_priority 80; + activation_priority 100; [config] String host { default {{{ return "127.0.0.1"; }}} diff --git a/lib/perfdata/graphitewriter.ti b/lib/perfdata/graphitewriter.ti index 4475eb18423..b28ba87a954 100644 --- a/lib/perfdata/graphitewriter.ti +++ b/lib/perfdata/graphitewriter.ti @@ -26,7 +26,7 @@ namespace icinga class GraphiteWriter : ConfigObject { - activation_priority 80; + activation_priority 100; [config] String host { default {{{ return "127.0.0.1"; }}} diff --git a/lib/perfdata/influxdbwriter.ti b/lib/perfdata/influxdbwriter.ti index a26af6d556f..d2f1ba6d2c1 100644 --- a/lib/perfdata/influxdbwriter.ti +++ b/lib/perfdata/influxdbwriter.ti @@ -26,7 +26,7 @@ namespace icinga class InfluxdbWriter : ConfigObject { - activation_priority 80; + activation_priority 100; [config, required] String host { default {{{ return "127.0.0.1"; }}} diff --git a/lib/perfdata/opentsdbwriter.ti b/lib/perfdata/opentsdbwriter.ti index 09d5f3ede74..3418e1606cc 100644 --- a/lib/perfdata/opentsdbwriter.ti +++ b/lib/perfdata/opentsdbwriter.ti @@ -26,7 +26,7 @@ namespace icinga class OpenTsdbWriter : ConfigObject { - activation_priority 80; + activation_priority 100; [config] String host { default {{{ return "127.0.0.1"; }}} diff --git a/lib/perfdata/perfdatawriter.ti b/lib/perfdata/perfdatawriter.ti index dbd303c75a4..d2ceb6899ba 100644 --- a/lib/perfdata/perfdatawriter.ti +++ b/lib/perfdata/perfdatawriter.ti @@ -27,7 +27,7 @@ namespace icinga class PerfdataWriter : ConfigObject { - activation_priority 80; + activation_priority 100; [config] String host_perfdata_path { default {{{ return Configuration::SpoolDir + "/perfdata/host-perfdata"; }}} From 5da1cbd0969ee1fa75a359937cc93976741fe694 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Tue, 19 Feb 2019 14:30:44 +0100 Subject: [PATCH 04/20] Revert "Respect activation priority also on deactivation" This reverts commit 8ad1717055683c573f81181c7749ba422fa96d13. --- lib/base/configobject.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/lib/base/configobject.cpp b/lib/base/configobject.cpp index f8517d61d21..b73285bc5dc 100644 --- a/lib/base/configobject.cpp +++ b/lib/base/configobject.cpp @@ -33,7 +33,6 @@ #include "base/workqueue.hpp" #include "base/context.hpp" #include "base/application.hpp" -#include #include #include #include @@ -618,13 +617,7 @@ void ConfigObject::RestoreObjects(const String& filename, int attributeTypes) void ConfigObject::StopObjects() { - auto types = Type::GetAllTypes(); - - std::sort(types.begin(), types.end(), [](const Type::Ptr& a, const Type::Ptr& b) { - return a->GetActivationPriority() > b->GetActivationPriority(); - }); - - for (const Type::Ptr& type : types) { + for (const Type::Ptr& type : Type::GetAllTypes()) { auto *dtype = dynamic_cast(type.get()); if (!dtype) From 7b2b45b503dd193d5eb3eba41353f33b5515ccab Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Tue, 19 Feb 2019 14:54:28 +0100 Subject: [PATCH 05/20] Revert "InfluxdbWriter#Stop(): flush buffer" This reverts commit 8470facfe2d8437d5db75e4f84434991d30032c7. --- lib/perfdata/influxdbwriter.cpp | 8 -------- lib/perfdata/influxdbwriter.hpp | 1 - 2 files changed, 9 deletions(-) diff --git a/lib/perfdata/influxdbwriter.cpp b/lib/perfdata/influxdbwriter.cpp index 80d05023364..91fb1be10ae 100644 --- a/lib/perfdata/influxdbwriter.cpp +++ b/lib/perfdata/influxdbwriter.cpp @@ -140,14 +140,6 @@ void InfluxdbWriter::Pause() ObjectImpl::Pause(); } -void InfluxdbWriter::Stop(bool runtimeDeleted) -{ - FlushTimeout(); - m_WorkQueue.Join(); - - ObjectImpl::Stop(runtimeDeleted); -} - void InfluxdbWriter::AssertOnWorkQueue() { ASSERT(m_WorkQueue.IsWorkerThread()); diff --git a/lib/perfdata/influxdbwriter.hpp b/lib/perfdata/influxdbwriter.hpp index 370ca25c5e1..face44f69a3 100644 --- a/lib/perfdata/influxdbwriter.hpp +++ b/lib/perfdata/influxdbwriter.hpp @@ -51,7 +51,6 @@ class InfluxdbWriter final : public ObjectImpl void OnConfigLoaded() override; void Resume() override; void Pause() override; - void Stop(bool runtimeDeleted) override; private: WorkQueue m_WorkQueue{10000000, 1}; From cc8dd42c0a5ad509f60d56b78a43b4039ca14e78 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 12:11:59 +0100 Subject: [PATCH 06/20] Start the notification component after general features --- lib/notification/notificationcomponent.ti | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/notification/notificationcomponent.ti b/lib/notification/notificationcomponent.ti index 840667fb87f..67266cee002 100644 --- a/lib/notification/notificationcomponent.ti +++ b/lib/notification/notificationcomponent.ti @@ -26,7 +26,7 @@ namespace icinga class NotificationComponent : ConfigObject { - activation_priority 100; + activation_priority 200; [config] bool enable_ha (EnableHA) { default {{{ return true; }}} From 9d5dc4afc1bb66ea25f98dd12acded49e493fbbf Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 12:37:48 +0100 Subject: [PATCH 07/20] Start the checker component at the latest point possible --- lib/checker/checkercomponent.ti | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/checker/checkercomponent.ti b/lib/checker/checkercomponent.ti index 9663bb1896a..ee5a74ece77 100644 --- a/lib/checker/checkercomponent.ti +++ b/lib/checker/checkercomponent.ti @@ -26,7 +26,7 @@ namespace icinga class CheckerComponent : ConfigObject { - activation_priority 100; + activation_priority 300; [config, no_storage] int concurrent_checks { get {{{ From a25e2b1038c34d2c11c51a47986c9cf5870021e4 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 12:38:57 +0100 Subject: [PATCH 08/20] Stop object in reversed activation priority order This stops the checker component first, then notifications, then features, then config objects, then the API feature and logger(s). Patch taken from @al2klimov --- lib/base/configobject.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/base/configobject.cpp b/lib/base/configobject.cpp index b73285bc5dc..149917a43fb 100644 --- a/lib/base/configobject.cpp +++ b/lib/base/configobject.cpp @@ -617,7 +617,15 @@ void ConfigObject::RestoreObjects(const String& filename, int attributeTypes) void ConfigObject::StopObjects() { - for (const Type::Ptr& type : Type::GetAllTypes()) { + std::vector types = Type::GetAllTypes(); + + std::sort(types.begin(), types.end(), [](const Type::Ptr& a, const Type::Ptr& b) { + if (a->GetActivationPriority() > b->GetActivationPriority()) + return true; + return false; + }); + + for (const Type::Ptr& type : types) { auto *dtype = dynamic_cast(type.get()); if (!dtype) From a4f97c1246ffac508ff5d3735b4cf27b260c5190 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 13:55:34 +0100 Subject: [PATCH 09/20] Stop the IcingaApplication at the latest point, before any loggers --- lib/icinga/icingaapplication.ti | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/icinga/icingaapplication.ti b/lib/icinga/icingaapplication.ti index dd4e060b11f..b3f90ff0213 100644 --- a/lib/icinga/icingaapplication.ti +++ b/lib/icinga/icingaapplication.ti @@ -26,6 +26,8 @@ namespace icinga class IcingaApplication : Application { + activation_priority -50; + [config, no_storage, virtual] String environment { get; set; From 7936a147ba82ce7e19de43ccb5f4c830edd8d235 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 13:56:15 +0100 Subject: [PATCH 10/20] Activate downtimes before any checkable object --- lib/icinga/downtime.ti | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/icinga/downtime.ti b/lib/icinga/downtime.ti index fe80e0e7745..d2880421f7b 100644 --- a/lib/icinga/downtime.ti +++ b/lib/icinga/downtime.ti @@ -37,6 +37,8 @@ public: class Downtime : ConfigObject < DowntimeNameComposer { + activation_priority -10; + load_after Host; load_after Service; From 388999a86fc82cf9e8ac2f13f9a5aa52bbc3b72f Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 14:20:53 +0100 Subject: [PATCH 11/20] PerfdataWriter: Rotate and close files on Pause/Shutdown/Reload Refactored the code into a local mutex and added some more debug logging while at it. --- lib/perfdata/perfdatawriter.cpp | 31 +++++++++++++++++++++++++++---- lib/perfdata/perfdatawriter.hpp | 10 ++++++---- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/lib/perfdata/perfdatawriter.cpp b/lib/perfdata/perfdatawriter.cpp index c560ef256d1..7330f3c98b9 100644 --- a/lib/perfdata/perfdatawriter.cpp +++ b/lib/perfdata/perfdatawriter.cpp @@ -83,6 +83,14 @@ void PerfdataWriter::Resume() void PerfdataWriter::Pause() { +#ifdef I2_DEBUG + //m_HostOutputFile << "\n# Pause the feature" << "\n\n"; + //m_ServiceOutputFile << "\n# Pause the feature" << "\n\n"; +#endif /* I2_DEBUG */ + + /* Force a rotation closing the file stream. */ + RotateAllFiles(); + Log(LogInformation, "PerfdataWriter") << "'" << GetName() << "' paused."; @@ -125,7 +133,8 @@ void PerfdataWriter::CheckResultHandler(const Checkable::Ptr& checkable, const C String line = MacroProcessor::ResolveMacros(GetServiceFormatTemplate(), resolvers, cr, nullptr, &PerfdataWriter::EscapeMacroMetric); { - ObjectLock olock(this); + boost::mutex::scoped_lock lock(m_StreamMutex); + if (!m_ServiceOutputFile.good()) return; @@ -135,7 +144,8 @@ void PerfdataWriter::CheckResultHandler(const Checkable::Ptr& checkable, const C String line = MacroProcessor::ResolveMacros(GetHostFormatTemplate(), resolvers, cr, nullptr, &PerfdataWriter::EscapeMacroMetric); { - ObjectLock olock(this); + boost::mutex::scoped_lock lock(m_StreamMutex); + if (!m_HostOutputFile.good()) return; @@ -146,13 +156,20 @@ void PerfdataWriter::CheckResultHandler(const Checkable::Ptr& checkable, const C void PerfdataWriter::RotateFile(std::ofstream& output, const String& temp_path, const String& perfdata_path) { - ObjectLock olock(this); + Log(LogDebug, "PerfdataWriter") + << "Rotating perfdata files."; + + boost::mutex::scoped_lock lock(m_StreamMutex); if (output.good()) { output.close(); if (Utility::PathExists(temp_path)) { String finalFile = perfdata_path + "." + Convert::ToString((long)Utility::GetTime()); + + Log(LogDebug, "PerfdataWriter") + << "Closed output file and renaming into '" << finalFile << "'."; + if (rename(temp_path.CStr(), finalFile.CStr()) < 0) { BOOST_THROW_EXCEPTION(posix_error() << boost::errinfo_api_function("rename") @@ -164,9 +181,10 @@ void PerfdataWriter::RotateFile(std::ofstream& output, const String& temp_path, output.open(temp_path.CStr()); - if (!output.good()) + if (!output.good()) { Log(LogWarning, "PerfdataWriter") << "Could not open perfdata file '" << temp_path << "' for writing. Perfdata will be lost."; + } } void PerfdataWriter::RotationTimerHandler() @@ -174,6 +192,11 @@ void PerfdataWriter::RotationTimerHandler() if (IsPaused()) return; + RotateAllFiles(); +} + +void PerfdataWriter::RotateAllFiles() +{ RotateFile(m_ServiceOutputFile, GetServiceTempPath(), GetServicePerfdataPath()); RotateFile(m_HostOutputFile, GetHostTempPath(), GetHostPerfdataPath()); } diff --git a/lib/perfdata/perfdatawriter.hpp b/lib/perfdata/perfdatawriter.hpp index 56d01d382e2..e7ea6aade8d 100644 --- a/lib/perfdata/perfdatawriter.hpp +++ b/lib/perfdata/perfdatawriter.hpp @@ -51,14 +51,16 @@ class PerfdataWriter final : public ObjectImpl void Pause() override; private: + Timer::Ptr m_RotationTimer; + std::ofstream m_ServiceOutputFile; + std::ofstream m_HostOutputFile; + boost::mutex m_StreamMutex; + void CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr); static Value EscapeMacroMetric(const Value& value); - Timer::Ptr m_RotationTimer; void RotationTimerHandler(); - - std::ofstream m_ServiceOutputFile; - std::ofstream m_HostOutputFile; + void RotateAllFiles(); void RotateFile(std::ofstream& output, const String& temp_path, const String& perfdata_path); }; From d79390083dceb69575a4267cc8363608ca8717d6 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 14:54:23 +0100 Subject: [PATCH 12/20] GraphiteWriter: Improve Pause/Shutdown/Reload handling for metrics --- lib/perfdata/graphitewriter.cpp | 37 ++++++++++++++++++++++++++++++--- lib/perfdata/graphitewriter.hpp | 2 ++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/lib/perfdata/graphitewriter.cpp b/lib/perfdata/graphitewriter.cpp index 1b6461d5137..5fa4479e673 100644 --- a/lib/perfdata/graphitewriter.cpp +++ b/lib/perfdata/graphitewriter.cpp @@ -102,12 +102,26 @@ void GraphiteWriter::Resume() Checkable::OnNewCheckResult.connect(std::bind(&GraphiteWriter::CheckResultHandler, this, _1, _2)); } +/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */ void GraphiteWriter::Pause() { - Log(LogInformation, "GraphiteWriter") - << "'" << GetName() << "' paused."; + m_ReconnectTimer.reset(); + + try { + ReconnectInternal(); + } catch (const std::exception&) { + Log(LogInformation, "GraphiteWriter") + << "'" << GetName() << "' paused. Unable to connect, not flushing buffers. Data may be lost on reload."; + + ObjectImpl::Pause(); + return; + } m_WorkQueue.Join(); + DisconnectInternal(); + + Log(LogInformation, "GraphiteWriter") + << "'" << GetName() << "' paused."; ObjectImpl::Pause(); } @@ -140,6 +154,11 @@ void GraphiteWriter::Reconnect() return; } + ReconnectInternal(); +} + +void GraphiteWriter::ReconnectInternal() +{ double startTime = Utility::GetTime(); CONTEXT("Reconnecting to Graphite '" + GetName() + "'"); @@ -172,6 +191,9 @@ void GraphiteWriter::Reconnect() void GraphiteWriter::ReconnectTimerHandler() { + if (IsPaused()) + return; + m_WorkQueue.Enqueue(std::bind(&GraphiteWriter::Reconnect, this), PriorityNormal); } @@ -179,6 +201,11 @@ void GraphiteWriter::Disconnect() { AssertOnWorkQueue(); + DisconnectInternal(); +} + +void GraphiteWriter::DisconnectInternal() +{ if (!GetConnected()) return; @@ -201,6 +228,10 @@ void GraphiteWriter::CheckResultHandlerInternal(const Checkable::Ptr& checkable, CONTEXT("Processing check result for '" + checkable->GetName() + "'"); + /* TODO: Deal with missing connection here. Needs refactoring + * into parsing the actual performance data and then putting it + * into a queue for re-inserting. */ + if (!IcingaApplication::GetInstance()->GetEnablePerfdata() || !checkable->GetEnablePerfdata()) return; @@ -293,7 +324,7 @@ void GraphiteWriter::SendMetric(const String& prefix, const String& name, double msgbuf << prefix << "." << name << " " << Convert::ToString(value) << " " << static_cast(ts); Log(LogDebug, "GraphiteWriter") - << "Add to metric list:'" << msgbuf.str() << "'."; + << "Add to metric list: '" << msgbuf.str() << "'."; // do not send \n to debug log msgbuf << "\n"; diff --git a/lib/perfdata/graphitewriter.hpp b/lib/perfdata/graphitewriter.hpp index ec4c5bbfe0f..2a4443e96a8 100644 --- a/lib/perfdata/graphitewriter.hpp +++ b/lib/perfdata/graphitewriter.hpp @@ -71,7 +71,9 @@ class GraphiteWriter final : public ObjectImpl void ReconnectTimerHandler(); void Disconnect(); + void DisconnectInternal(); void Reconnect(); + void ReconnectInternal(); void AssertOnWorkQueue(); From d4bdebc79505f999ec9de2bc3d5a0fea012de976 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 15:21:24 +0100 Subject: [PATCH 13/20] InfluxdbWriter: Ensure to flush buffers after emptying the metrics queue on Pause/Shutdown/Reload Patch taken from @al2klimov but moved into Pause() --- lib/perfdata/influxdbwriter.cpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/lib/perfdata/influxdbwriter.cpp b/lib/perfdata/influxdbwriter.cpp index 91fb1be10ae..13b15d30b50 100644 --- a/lib/perfdata/influxdbwriter.cpp +++ b/lib/perfdata/influxdbwriter.cpp @@ -130,13 +130,30 @@ void InfluxdbWriter::Resume() Checkable::OnNewCheckResult.connect(std::bind(&InfluxdbWriter::CheckResultHandler, this, _1, _2)); } +/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */ void InfluxdbWriter::Pause() { - Log(LogInformation, "InfluxdbWriter") - << "'" << GetName() << "' paused."; + /* Force a flush. */ + Log(LogDebug, "InfluxdbWriter") + << "Flushing pending data buffers."; + + Flush(); + + /* Work on the missing tasks. TODO: Find a way to cache them on disk. */ + Log(LogDebug, "InfluxdbWriter") + << "Joining existing WQ tasks."; m_WorkQueue.Join(); + /* Flush again after the WQ tasks have filled the data buffer. */ + Log(LogDebug, "InfluxdbWriter") + << "Flushing data buffers from WQ tasks."; + + Flush(); + + Log(LogInformation, "InfluxdbWriter") + << "'" << GetName() << "' paused."; + ObjectImpl::Pause(); } @@ -428,6 +445,9 @@ void InfluxdbWriter::FlushTimeoutWQ() void InfluxdbWriter::Flush() { + Log(LogDebug, "InfluxdbWriter") + << "Flushing data buffer to InfluxDB."; + String body = boost::algorithm::join(m_DataBuffer, "\n"); m_DataBuffer.clear(); From 1bfe756cb14bd23d53f1e39f76e53e3f302428d7 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 17:10:46 +0100 Subject: [PATCH 14/20] DB IDO: Improve queue handling on Pause/Shutdown/Reload --- lib/db_ido/servicedbobject.cpp | 15 --------------- lib/db_ido_mysql/idomysqlconnection.cpp | 19 ++++++++++--------- lib/db_ido_pgsql/idopgsqlconnection.cpp | 19 ++++++++++--------- 3 files changed, 20 insertions(+), 33 deletions(-) diff --git a/lib/db_ido/servicedbobject.cpp b/lib/db_ido/servicedbobject.cpp index 903bba65594..fcc294cbc32 100644 --- a/lib/db_ido/servicedbobject.cpp +++ b/lib/db_ido/servicedbobject.cpp @@ -200,9 +200,6 @@ void ServiceDbObject::OnConfigUpdateHeavy() DbObject::OnMultipleQueries(queries); /* service dependencies */ - Log(LogDebug, "ServiceDbObject") - << "service dependencies for '" << service->GetName() << "'"; - queries.clear(); DbQuery query2; @@ -250,9 +247,6 @@ void ServiceDbObject::OnConfigUpdateHeavy() DbObject::OnMultipleQueries(queries); /* service contacts, contactgroups */ - Log(LogDebug, "ServiceDbObject") - << "service contacts: " << service->GetName(); - queries.clear(); DbQuery query3; @@ -265,9 +259,6 @@ void ServiceDbObject::OnConfigUpdateHeavy() queries.emplace_back(std::move(query3)); for (const User::Ptr& user : CompatUtility::GetCheckableNotificationUsers(service)) { - Log(LogDebug, "ServiceDbObject") - << "service contacts: " << user->GetName(); - DbQuery query_contact; query_contact.Table = GetType()->GetTable() + "_contacts"; query_contact.Type = DbQueryInsert; @@ -283,9 +274,6 @@ void ServiceDbObject::OnConfigUpdateHeavy() DbObject::OnMultipleQueries(queries); - Log(LogDebug, "ServiceDbObject") - << "service contactgroups: " << service->GetName(); - queries.clear(); DbQuery query4; @@ -298,9 +286,6 @@ void ServiceDbObject::OnConfigUpdateHeavy() queries.emplace_back(std::move(query4)); for (const UserGroup::Ptr& usergroup : CompatUtility::GetCheckableNotificationUserGroups(service)) { - Log(LogDebug, "ServiceDbObject") - << "service contactgroups: " << usergroup->GetName(); - DbQuery query_contact; query_contact.Table = GetType()->GetTable() + "_contactgroups"; query_contact.Type = DbQueryInsert; diff --git a/lib/db_ido_mysql/idomysqlconnection.cpp b/lib/db_ido_mysql/idomysqlconnection.cpp index 9bbbeb0df42..a6323332c74 100644 --- a/lib/db_ido_mysql/idomysqlconnection.cpp +++ b/lib/db_ido_mysql/idomysqlconnection.cpp @@ -107,9 +107,6 @@ void IdoMysqlConnection::Resume() void IdoMysqlConnection::Pause() { - Log(LogInformation, "IdoMysqlConnection") - << "'" << GetName() << "' paused."; - m_ReconnectTimer.reset(); DbConnection::Pause(); @@ -119,8 +116,12 @@ void IdoMysqlConnection::Pause() << "Rescheduling disconnect task."; #endif /* I2_DEBUG */ - m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Disconnect, this), PriorityHigh); + m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Disconnect, this), PriorityLow); m_QueryQueue.Join(); + + Log(LogInformation, "IdoMysqlConnection") + << "'" << GetName() << "' paused."; + } void IdoMysqlConnection::ExceptionHandler(boost::exception_ptr exp) @@ -192,7 +193,7 @@ void IdoMysqlConnection::ReconnectTimerHandler() << "Scheduling reconnect task."; #endif /* I2_DEBUG */ - m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityHigh); } void IdoMysqlConnection::Reconnect() @@ -451,9 +452,9 @@ void IdoMysqlConnection::Reconnect() << "Scheduling session table clear and finish connect task."; #endif /* I2_DEBUG */ - m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityHigh); - m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityHigh); } void IdoMysqlConnection::FinishConnect(double startTime) @@ -726,7 +727,7 @@ void IdoMysqlConnection::ActivateObject(const DbObject::Ptr& dbobj) << "Scheduling object activation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'."; #endif /* I2_DEBUG */ - m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityHigh); } void IdoMysqlConnection::InternalActivateObject(const DbObject::Ptr& dbobj) @@ -771,7 +772,7 @@ void IdoMysqlConnection::DeactivateObject(const DbObject::Ptr& dbobj) << "Scheduling object deactivation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'."; #endif /* I2_DEBUG */ - m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityHigh); } void IdoMysqlConnection::InternalDeactivateObject(const DbObject::Ptr& dbobj) diff --git a/lib/db_ido_pgsql/idopgsqlconnection.cpp b/lib/db_ido_pgsql/idopgsqlconnection.cpp index 4724ab70717..bc791fbd6c1 100644 --- a/lib/db_ido_pgsql/idopgsqlconnection.cpp +++ b/lib/db_ido_pgsql/idopgsqlconnection.cpp @@ -114,15 +114,16 @@ void IdoPgsqlConnection::Resume() void IdoPgsqlConnection::Pause() { - Log(LogInformation, "IdoPgsqlConnection") - << "'" << GetName() << "' paused."; - m_ReconnectTimer.reset(); DbConnection::Pause(); - m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Disconnect, this), PriorityHigh); + m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Disconnect, this), PriorityLow); m_QueryQueue.Join(); + + Log(LogInformation, "IdoPgsqlConnection") + << "'" << GetName() << "' paused."; + } void IdoPgsqlConnection::ExceptionHandler(boost::exception_ptr exp) @@ -182,7 +183,7 @@ void IdoPgsqlConnection::InternalNewTransaction() void IdoPgsqlConnection::ReconnectTimerHandler() { - m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Reconnect, this), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Reconnect, this), PriorityHigh); } void IdoPgsqlConnection::Reconnect() @@ -425,9 +426,9 @@ void IdoPgsqlConnection::Reconnect() UpdateAllObjects(); - m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::ClearTablesBySession, this), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::ClearTablesBySession, this), PriorityHigh); - m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::FinishConnect, this, startTime), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::FinishConnect, this, startTime), PriorityHigh); } void IdoPgsqlConnection::FinishConnect(double startTime) @@ -575,7 +576,7 @@ void IdoPgsqlConnection::ActivateObject(const DbObject::Ptr& dbobj) if (IsPaused()) return; - m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalActivateObject, this, dbobj), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalActivateObject, this, dbobj), PriorityHigh); } void IdoPgsqlConnection::InternalActivateObject(const DbObject::Ptr& dbobj) @@ -612,7 +613,7 @@ void IdoPgsqlConnection::DeactivateObject(const DbObject::Ptr& dbobj) if (IsPaused()) return; - m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalDeactivateObject, this, dbobj), PriorityLow); + m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalDeactivateObject, this, dbobj), PriorityHigh); } void IdoPgsqlConnection::InternalDeactivateObject(const DbObject::Ptr& dbobj) From 301c48e9bc8e5c37e4b091bdf72b6deafa29eb99 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 17:11:39 +0100 Subject: [PATCH 15/20] ElasticWriter: Ensure to flush buffers on Pause/Shutdown/Reload --- lib/perfdata/elasticsearchwriter.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/perfdata/elasticsearchwriter.cpp b/lib/perfdata/elasticsearchwriter.cpp index ec89bed01f2..ac2005d7a5d 100644 --- a/lib/perfdata/elasticsearchwriter.cpp +++ b/lib/perfdata/elasticsearchwriter.cpp @@ -104,13 +104,16 @@ void ElasticsearchWriter::Resume() Checkable::OnNotificationSentToAllUsers.connect(std::bind(&ElasticsearchWriter::NotificationSentToAllUsersHandler, this, _1, _2, _3, _4, _5, _6, _7)); } +/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */ void ElasticsearchWriter::Pause() { + Flush(); + m_WorkQueue.Join(); + Flush(); + Log(LogInformation, "ElasticsearchWriter") << "'" << GetName() << "' paused."; - m_WorkQueue.Join(); - ObjectImpl::Pause(); } From 24c3572b034bbb387c9654cef48ab2695ddcb65c Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 17:17:45 +0100 Subject: [PATCH 16/20] OpenTsdbWriter: Ensure to reset reconnect timer on Pause() --- lib/perfdata/opentsdbwriter.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/perfdata/opentsdbwriter.cpp b/lib/perfdata/opentsdbwriter.cpp index 78feea8f26f..6999482caee 100644 --- a/lib/perfdata/opentsdbwriter.cpp +++ b/lib/perfdata/opentsdbwriter.cpp @@ -85,8 +85,11 @@ void OpenTsdbWriter::Resume() Service::OnNewCheckResult.connect(std::bind(&OpenTsdbWriter::CheckResultHandler, this, _1, _2)); } +/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */ void OpenTsdbWriter::Pause() { + m_ReconnectTimer.reset(); + Log(LogInformation, "OpentsdbWriter") << "'" << GetName() << "' paused."; From 68e7027c937a0b91e5d75f95971c9b0423f72603 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 20 Feb 2019 17:18:24 +0100 Subject: [PATCH 17/20] GelfWriter: Ensure to join queue and execute queries on Pause/Shutdown/Reload --- lib/perfdata/gelfwriter.cpp | 28 ++++++++++++++++++++++++++-- lib/perfdata/gelfwriter.hpp | 2 ++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/lib/perfdata/gelfwriter.cpp b/lib/perfdata/gelfwriter.cpp index c4c0912d249..ea49231a3f2 100644 --- a/lib/perfdata/gelfwriter.cpp +++ b/lib/perfdata/gelfwriter.cpp @@ -107,12 +107,26 @@ void GelfWriter::Resume() Checkable::OnStateChange.connect(std::bind(&GelfWriter::StateChangeHandler, this, _1, _2, _3)); } +/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */ void GelfWriter::Pause() { - Log(LogInformation, "GelfWriter") - << "'" << GetName() << "' paused."; + m_ReconnectTimer.reset(); + + try { + ReconnectInternal(); + } catch (const std::exception&) { + Log(LogInformation, "GelfWriter") + << "'" << GetName() << "' paused. Unable to connect, not flushing buffers. Data may be lost on reload."; + + ObjectImpl::Pause(); + return; + } m_WorkQueue.Join(); + DisconnectInternal(); + + Log(LogInformation, "GraphiteWriter") + << "'" << GetName() << "' paused."; ObjectImpl::Pause(); } @@ -145,6 +159,11 @@ void GelfWriter::Reconnect() return; } + ReconnectInternal(); +} + +void GelfWriter::ReconnectInternal() +{ double startTime = Utility::GetTime(); CONTEXT("Reconnecting to Graylog Gelf '" + GetName() + "'"); @@ -184,6 +203,11 @@ void GelfWriter::Disconnect() { AssertOnWorkQueue(); + DisconnectInternal(); +} + +void GelfWriter::DisconnectInternal() +{ if (!GetConnected()) return; diff --git a/lib/perfdata/gelfwriter.hpp b/lib/perfdata/gelfwriter.hpp index 32e35da57d4..edff50997d3 100644 --- a/lib/perfdata/gelfwriter.hpp +++ b/lib/perfdata/gelfwriter.hpp @@ -72,7 +72,9 @@ class GelfWriter final : public ObjectImpl void ReconnectTimerHandler(); void Disconnect(); + void DisconnectInternal(); void Reconnect(); + void ReconnectInternal(); void AssertOnWorkQueue(); From 53acb8f2db84df7efffd25f3694c55cd66844b70 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Fri, 22 Feb 2019 09:34:16 +0100 Subject: [PATCH 18/20] PerfdataWriter: Reset the rotation timer on Pause() --- lib/perfdata/perfdatawriter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/perfdata/perfdatawriter.cpp b/lib/perfdata/perfdatawriter.cpp index 7330f3c98b9..11d7b5f1191 100644 --- a/lib/perfdata/perfdatawriter.cpp +++ b/lib/perfdata/perfdatawriter.cpp @@ -83,6 +83,8 @@ void PerfdataWriter::Resume() void PerfdataWriter::Pause() { + m_RotationTimer.reset(); + #ifdef I2_DEBUG //m_HostOutputFile << "\n# Pause the feature" << "\n\n"; //m_ServiceOutputFile << "\n# Pause the feature" << "\n\n"; From 89634c2eb65daef9297179ca0c968a236409c30a Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Fri, 22 Feb 2019 15:27:34 +0100 Subject: [PATCH 19/20] Re-add Defer functionality --- lib/base/defer.hpp | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 lib/base/defer.hpp diff --git a/lib/base/defer.hpp b/lib/base/defer.hpp new file mode 100644 index 00000000000..05ae0b81996 --- /dev/null +++ b/lib/base/defer.hpp @@ -0,0 +1,59 @@ +/****************************************************************************** + * Icinga 2 * + * Copyright (C) 2012-2018 Icinga Development Team (https://icinga.com/) * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of the GNU General Public License * + * as published by the Free Software Foundation; either version 2 * + * of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software Foundation * + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. * + ******************************************************************************/ + +#ifndef DEFER +#define DEFER + +#include +#include + +namespace icinga +{ + +/** + * An action to be executed at end of scope. + * + * @ingroup base + */ +class Defer +{ +public: + inline + Defer(std::function func) : m_Func(std::move(func)) + { + } + + Defer(const Defer&) = delete; + Defer(Defer&&) = delete; + Defer& operator=(const Defer&) = delete; + Defer& operator=(Defer&&) = delete; + + inline + ~Defer() + { + m_Func(); + } + +private: + std::function m_Func; +}; + +} + +#endif /* DEFER */ From ab7a799369b73ab1f46f35169e0110e2479cd567 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Fri, 22 Feb 2019 15:53:38 +0100 Subject: [PATCH 20/20] Implement ReloadTimeout constant and wait for enqueued checks on Stop() --- doc/17-language-reference.md | 1 + lib/base/application.cpp | 17 ++++++++++++++--- lib/base/configuration.cpp | 11 +++++++++++ lib/base/configuration.hpp | 4 ++++ lib/base/configuration.ti | 5 +++++ lib/checker/checkercomponent.cpp | 29 ++++++++++++++++++++++++++--- lib/remote/configpackageutility.cpp | 2 +- 7 files changed, 62 insertions(+), 7 deletions(-) diff --git a/doc/17-language-reference.md b/doc/17-language-reference.md index 039e3ad319b..775684a1a83 100644 --- a/doc/17-language-reference.md +++ b/doc/17-language-reference.md @@ -440,6 +440,7 @@ Constant | Description --------------------|------------------- Vars |**Read-write.** Contains a dictionary with global custom attributes. Not set by default. NodeName |**Read-write.** Contains the cluster node name. Set to the local hostname by default. +ReloadTimeout |**Read-write.** Defines the reload timeout for child processes. Defaults to `300s`. Environment |**Read-write.** The name of the Icinga environment. Included in the SNI host name for outbound connections. Not set by default. RunAsUser |**Read-write.** Defines the user the Icinga 2 daemon is running as. Set in the Icinga 2 sysconfig. RunAsGroup |**Read-write.** Defines the group the Icinga 2 daemon is running as. Set in the Icinga 2 sysconfig. diff --git a/lib/base/application.cpp b/lib/base/application.cpp index df93710b710..62ed10cc5f2 100644 --- a/lib/base/application.cpp +++ b/lib/base/application.cpp @@ -77,6 +77,14 @@ void Application::OnConfigLoaded() ASSERT(m_Instance == nullptr); m_Instance = this; + + String reloadTimeout; + + if (ScriptGlobal::Exists("ReloadTimeout")) + reloadTimeout = ScriptGlobal::Get("ReloadTimeout"); + + if (!reloadTimeout.IsEmpty()) + Configuration::ReloadTimeout = Convert::ToDouble(reloadTimeout); } /** @@ -401,8 +409,6 @@ static void ReloadProcessCallback(const ProcessResult& pr) pid_t Application::StartReloadProcess() { - Log(LogInformation, "Application", "Got reload command: Starting new instance."); - // prepare arguments ArrayData args; args.push_back(GetExePath(m_ArgV[0])); @@ -422,9 +428,14 @@ pid_t Application::StartReloadProcess() #endif /* _WIN32 */ Process::Ptr process = new Process(Process::PrepareCommand(new Array(std::move(args)))); - process->SetTimeout(300); + process->SetTimeout(Configuration::ReloadTimeout); process->Run(&ReloadProcessCallback); + Log(LogInformation, "Application") + << "Got reload command: Started new instance with PID '" + << (unsigned long)(process->GetPID()) << "' (timeout is " + << Configuration::ReloadTimeout << "s)."; + return process->GetPID(); } diff --git a/lib/base/configuration.cpp b/lib/base/configuration.cpp index 0103028de73..c22b2f46b40 100644 --- a/lib/base/configuration.cpp +++ b/lib/base/configuration.cpp @@ -42,6 +42,7 @@ String Configuration::PidPath; String Configuration::PkgDataDir; String Configuration::PrefixDir; String Configuration::ProgramData; +double Configuration::ReloadTimeout{300}; int Configuration::RLimitFiles; int Configuration::RLimitProcesses; int Configuration::RLimitStack; @@ -240,6 +241,16 @@ void Configuration::SetProgramData(const String& val, bool suppress_events, cons HandleUserWrite("ProgramData", &Configuration::ProgramData, val, m_ReadOnly); } +double Configuration::GetReloadTimeout() const +{ + return Configuration::ReloadTimeout; +} + +void Configuration::SetReloadTimeout(double val, bool suppress_events, const Value& cookie) +{ + HandleUserWrite("ReloadTimeout", &Configuration::ReloadTimeout, val, m_ReadOnly); +} + int Configuration::GetRLimitFiles() const { return Configuration::RLimitFiles; diff --git a/lib/base/configuration.hpp b/lib/base/configuration.hpp index 96ab2726f53..694a02a8c08 100644 --- a/lib/base/configuration.hpp +++ b/lib/base/configuration.hpp @@ -87,6 +87,9 @@ class Configuration : public ObjectImpl String GetProgramData() const override; void SetProgramData(const String& value, bool suppress_events = false, const Value& cookie = Empty) override; + double GetReloadTimeout() const override; + void SetReloadTimeout(double value, bool suppress_events = false, const Value& cookie = Empty) override; + int GetRLimitFiles() const override; void SetRLimitFiles(int value, bool suppress_events = false, const Value& cookie = Empty) override; @@ -147,6 +150,7 @@ class Configuration : public ObjectImpl static String PkgDataDir; static String PrefixDir; static String ProgramData; + static double ReloadTimeout; static int RLimitFiles; static int RLimitProcesses; static int RLimitStack; diff --git a/lib/base/configuration.ti b/lib/base/configuration.ti index 22f121411a2..356c7fa6b20 100644 --- a/lib/base/configuration.ti +++ b/lib/base/configuration.ti @@ -111,6 +111,11 @@ abstract class Configuration set; }; + [config, no_storage, virtual] double ReloadTimeout { + get; + set; + }; + [config, no_storage, virtual] int RLimitFiles { get; set; diff --git a/lib/checker/checkercomponent.cpp b/lib/checker/checkercomponent.cpp index cde975b7251..31bf2b20f32 100644 --- a/lib/checker/checkercomponent.cpp +++ b/lib/checker/checkercomponent.cpp @@ -84,18 +84,41 @@ void CheckerComponent::Start(bool runtimeCreated) void CheckerComponent::Stop(bool runtimeRemoved) { - Log(LogInformation, "CheckerComponent") - << "'" << GetName() << "' stopped."; - { boost::mutex::scoped_lock lock(m_Mutex); m_Stopped = true; m_CV.notify_all(); } + double wait = 0.0; + + while (GetPendingCheckables() > 0) { + Log(LogDebug, "CheckerComponent") + << "Waiting for running checks (" << GetPendingCheckables() + << ") to finish. Waited for " << wait << " seconds now."; + + Utility::Sleep(0.1); + wait += 0.1; + + /* Pick a timeout slightly shorther than the process reload timeout. */ + double waitMax = Configuration::ReloadTimeout - 30; + if (waitMax <= 0) + waitMax = 1; + + if (wait > waitMax) { + Log(LogWarning, "CheckerComponent") + << "Checks running too long for " << wait + << " seconds, hard shutdown before reload timeout: " << Configuration::ReloadTimeout << "."; + break; + } + } + m_ResultTimer->Stop(); m_Thread.join(); + Log(LogInformation, "CheckerComponent") + << "'" << GetName() << "' stopped."; + ObjectImpl::Stop(runtimeRemoved); } diff --git a/lib/remote/configpackageutility.cpp b/lib/remote/configpackageutility.cpp index 3b15724d3ca..d103b413a9b 100644 --- a/lib/remote/configpackageutility.cpp +++ b/lib/remote/configpackageutility.cpp @@ -235,7 +235,7 @@ void ConfigPackageUtility::AsyncTryActivateStage(const String& packageName, cons args->Add("ActiveStageOverride=" + packageName + ":" + stageName); Process::Ptr process = new Process(Process::PrepareCommand(args)); - process->SetTimeout(300); + process->SetTimeout(Configuration::ReloadTimeout); process->Run(std::bind(&TryActivateStageCallback, _1, packageName, stageName, reload)); }