]> granicus.if.org Git - icinga2/commitdiff
Implement concurrent checks limit for remote checks
authorNoah Hilverling <noah.hilverling@icinga.com>
Tue, 16 Jan 2018 09:40:08 +0000 (10:40 +0100)
committerNoah Hilverling <noah.hilverling@icinga.com>
Mon, 29 Jan 2018 13:50:14 +0000 (14:50 +0100)
fixes #4841

lib/icinga/CMakeLists.txt
lib/icinga/checkable-check.cpp
lib/icinga/checkable.hpp
lib/icinga/clusterevents-check.cpp [new file with mode: 0644]
lib/icinga/clusterevents.cpp
lib/icinga/clusterevents.hpp

index 26d7d7b721b66b343d0bb2d8f885eb3334399b6b..6f9f14d7e73d95ab98398d9a456fe34bb264dacc 100644 (file)
@@ -49,7 +49,7 @@ set(icinga_SOURCES
   checkcommand.cpp checkcommand.hpp checkcommand-ti.hpp
   checkresult.cpp checkresult.hpp checkresult-ti.hpp
   cib.cpp cib.hpp
-  clusterevents.cpp clusterevents.hpp
+  clusterevents.cpp clusterevents.hpp clusterevents-check.cpp
   command.cpp command.hpp command-ti.hpp
   comment.cpp comment.hpp comment-ti.hpp
   compatutility.cpp compatutility.hpp
index 5d658d3b03e4688e0807dafc02be9933a6322116..8bf1fed74152e7c4c7536fe48cea7d4877f89f91 100644 (file)
@@ -42,6 +42,7 @@ boost::signals2::signal<void (const Checkable::Ptr&)> Checkable::OnNextCheckUpda
 
 boost::mutex Checkable::m_StatsMutex;
 int Checkable::m_PendingChecks = 0;
+boost::condition_variable Checkable::m_PendingChecksCV;
 
 CheckCommand::Ptr Checkable::GetCheckCommand() const
 {
@@ -544,6 +545,7 @@ void Checkable::DecreasePendingChecks()
 {
        boost::mutex::scoped_lock lock(m_StatsMutex);
        m_PendingChecks--;
+       m_PendingChecksCV.notify_one();
 }
 
 int Checkable::GetPendingChecks()
@@ -551,3 +553,12 @@ int Checkable::GetPendingChecks()
        boost::mutex::scoped_lock lock(m_StatsMutex);
        return m_PendingChecks;
 }
+
+void Checkable::AquirePendingCheckSlot(int maxPendingChecks)
+{
+       boost::mutex::scoped_lock lock(m_StatsMutex);
+       while (m_PendingChecks >= maxPendingChecks)
+               m_PendingChecksCV.wait(lock);
+
+       m_PendingChecks++;
+}
index 24582389da74834f5d28bff92fb4db673024d5e1..a71487e54131b5d06ec0dade5af9350acb177b35 100644 (file)
@@ -197,6 +197,7 @@ public:
        static void IncreasePendingChecks();
        static void DecreasePendingChecks();
        static int GetPendingChecks();
+       static void AquirePendingCheckSlot(int maxPendingChecks);
 
        static Object::Ptr GetPrototype();
 
@@ -211,6 +212,7 @@ private:
 
        static boost::mutex m_StatsMutex;
        static int m_PendingChecks;
+       static boost::condition_variable m_PendingChecksCV;
 
        /* Downtimes */
        std::set<Downtime::Ptr> m_Downtimes;
diff --git a/lib/icinga/clusterevents-check.cpp b/lib/icinga/clusterevents-check.cpp
new file mode 100644 (file)
index 0000000..41e2be2
--- /dev/null
@@ -0,0 +1,186 @@
+/******************************************************************************
+ * Icinga 2                                                                   *
+ * Copyright (C) 2012-2018 Icinga Development Team (https://www.icinga.com/)  *
+ *                                                                            *
+ * This program is free software; you can redistribute it and/or              *
+ * modify it under the terms of the GNU General Public License                *
+ * as published by the Free Software Foundation; either version 2             *
+ * of the License, or (at your option) any later version.                     *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU General Public License for more details.                               *
+ *                                                                            *
+ * You should have received a copy of the GNU General Public License          *
+ * along with this program; if not, write to the Free Software Foundation     *
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.             *
+ ******************************************************************************/
+
+#include "icinga/clusterevents.hpp"
+#include "remote/apilistener.hpp"
+#include "base/serializer.hpp"
+#include "base/exception.hpp"
+#include <thread>
+
+using namespace icinga;
+
+boost::mutex ClusterEvents::m_Mutex;
+std::deque<std::function<void ()>> ClusterEvents::m_CheckRequestQueue;
+bool ClusterEvents::m_CheckSchedulerRunning;
+
+void ClusterEvents::RemoteCheckThreadProc()
+{
+       Utility::SetThreadName("Remote Check Scheduler");
+
+       boost::mutex::scoped_lock lock(m_Mutex);
+
+       for(;;) {
+               if (m_CheckRequestQueue.empty())
+                       break;
+
+               lock.unlock();
+               Checkable::AquirePendingCheckSlot(Application::GetMaxConcurrentChecks());
+               lock.lock();
+
+               auto callback = m_CheckRequestQueue.front();
+               m_CheckRequestQueue.pop_front();
+               lock.unlock();
+
+               callback();
+               Checkable::DecreasePendingChecks();
+
+               lock.lock();
+       }
+
+       m_CheckSchedulerRunning = false;
+}
+
+void ClusterEvents::EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
+{
+       boost::mutex::scoped_lock lock(m_Mutex);
+
+       if (m_CheckRequestQueue.size() >= 25000) {
+               Log(LogCritical, "ClusterEvents", "Remote check queue ran out of slots. Discarding remote check request.");
+               return;
+       }
+
+       m_CheckRequestQueue.push_back(std::bind(ClusterEvents::ExecuteCheckFromQueue, origin, params));
+
+       if (!m_CheckSchedulerRunning) {
+               std::thread t(ClusterEvents::RemoteCheckThreadProc);
+               t.detach();
+               m_CheckSchedulerRunning = true;
+       }
+}
+
+void ClusterEvents::ExecuteCheckFromQueue(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params) {
+
+       Endpoint::Ptr sourceEndpoint = origin->FromClient->GetEndpoint();
+
+       if (!sourceEndpoint || (origin->FromZone && !Zone::GetLocalZone()->IsChildOf(origin->FromZone))) {
+               Log(LogNotice, "ClusterEvents")
+                               << "Discarding 'execute command' message from '" << origin->FromClient->GetIdentity() << "': Invalid endpoint origin (client not allowed).";
+               return;
+       }
+
+       ApiListener::Ptr listener = ApiListener::GetInstance();
+
+       if (!listener) {
+               Log(LogCritical, "ApiListener", "No instance available.");
+               return;
+       }
+
+       if (!listener->GetAcceptCommands()) {
+               Log(LogWarning, "ApiListener")
+                               << "Ignoring command. '" << listener->GetName() << "' does not accept commands.";
+
+               Host::Ptr host = new Host();
+               Dictionary::Ptr attrs = new Dictionary();
+
+               attrs->Set("__name", params->Get("host"));
+               attrs->Set("type", "Host");
+               attrs->Set("enable_active_checks", false);
+
+               Deserialize(host, attrs, false, FAConfig);
+
+               if (params->Contains("service"))
+                       host->SetExtension("agent_service_name", params->Get("service"));
+
+               CheckResult::Ptr cr = new CheckResult();
+               cr->SetState(ServiceUnknown);
+               cr->SetOutput("Endpoint '" + Endpoint::GetLocalEndpoint()->GetName() + "' does not accept commands.");
+               Dictionary::Ptr message = MakeCheckResultMessage(host, cr);
+               listener->SyncSendMessage(sourceEndpoint, message);
+
+               return;
+       }
+
+       /* use a virtual host object for executing the command */
+       Host::Ptr host = new Host();
+       Dictionary::Ptr attrs = new Dictionary();
+
+       attrs->Set("__name", params->Get("host"));
+       attrs->Set("type", "Host");
+
+       Deserialize(host, attrs, false, FAConfig);
+
+       if (params->Contains("service"))
+               host->SetExtension("agent_service_name", params->Get("service"));
+
+       String command = params->Get("command");
+       String command_type = params->Get("command_type");
+
+       if (command_type == "check_command") {
+               if (!CheckCommand::GetByName(command)) {
+                       CheckResult::Ptr cr = new CheckResult();
+                       cr->SetState(ServiceUnknown);
+                       cr->SetOutput("Check command '" + command + "' does not exist.");
+                       Dictionary::Ptr message = MakeCheckResultMessage(host, cr);
+                       listener->SyncSendMessage(sourceEndpoint, message);
+                       return;
+               }
+       } else if (command_type == "event_command") {
+               if (!EventCommand::GetByName(command)) {
+                       Log(LogWarning, "ClusterEvents")
+                                       << "Event command '" << command << "' does not exist.";
+                       return;
+               }
+       } else
+               return;
+
+       attrs->Set(command_type, params->Get("command"));
+       attrs->Set("command_endpoint", sourceEndpoint->GetName());
+
+       Deserialize(host, attrs, false, FAConfig);
+
+       host->SetExtension("agent_check", true);
+
+       Dictionary::Ptr macros = params->Get("macros");
+
+       if (command_type == "check_command") {
+               try {
+                       host->ExecuteRemoteCheck(macros);
+               } catch (const std::exception& ex) {
+                       CheckResult::Ptr cr = new CheckResult();
+                       cr->SetState(ServiceUnknown);
+
+                       String output = "Exception occured while checking '" + host->GetName() + "': " + DiagnosticInformation(ex);
+                       cr->SetOutput(output);
+
+                       double now = Utility::GetTime();
+                       cr->SetScheduleStart(now);
+                       cr->SetScheduleEnd(now);
+                       cr->SetExecutionStart(now);
+                       cr->SetExecutionEnd(now);
+
+                       Dictionary::Ptr message = MakeCheckResultMessage(host, cr);
+                       listener->SyncSendMessage(sourceEndpoint, message);
+
+                       Log(LogCritical, "checker", output);
+               }
+       } else if (command_type == "event_command") {
+               host->ExecuteEventHandler(macros, true);
+       }
+}
+
index 013bae284ffd7286d68a1b936548b5ab5aef8c2b..9ae894db1bcd276c7879edc4e98e606deb255c16 100644 (file)
@@ -578,112 +578,7 @@ Value ClusterEvents::AcknowledgementClearedAPIHandler(const MessageOrigin::Ptr&
 
 Value ClusterEvents::ExecuteCommandAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
 {
-       Endpoint::Ptr sourceEndpoint = origin->FromClient->GetEndpoint();
-
-       if (!sourceEndpoint || (origin->FromZone && !Zone::GetLocalZone()->IsChildOf(origin->FromZone))) {
-               Log(LogNotice, "ClusterEvents")
-                       << "Discarding 'execute command' message from '" << origin->FromClient->GetIdentity() << "': Invalid endpoint origin (client not allowed).";
-               return Empty;
-       }
-
-       ApiListener::Ptr listener = ApiListener::GetInstance();
-
-       if (!listener) {
-               Log(LogCritical, "ApiListener", "No instance available.");
-               return Empty;
-       }
-
-       if (!listener->GetAcceptCommands()) {
-               Log(LogWarning, "ApiListener")
-                       << "Ignoring command. '" << listener->GetName() << "' does not accept commands.";
-
-               Host::Ptr host = new Host();
-               Dictionary::Ptr attrs = new Dictionary();
-
-               attrs->Set("__name", params->Get("host"));
-               attrs->Set("type", "Host");
-               attrs->Set("enable_active_checks", false);
-
-               Deserialize(host, attrs, false, FAConfig);
-
-               if (params->Contains("service"))
-                       host->SetExtension("agent_service_name", params->Get("service"));
-
-               CheckResult::Ptr cr = new CheckResult();
-               cr->SetState(ServiceUnknown);
-               cr->SetOutput("Endpoint '" + Endpoint::GetLocalEndpoint()->GetName() + "' does not accept commands.");
-               Dictionary::Ptr message = MakeCheckResultMessage(host, cr);
-               listener->SyncSendMessage(sourceEndpoint, message);
-
-               return Empty;
-       }
-
-       /* use a virtual host object for executing the command */
-       Host::Ptr host = new Host();
-       Dictionary::Ptr attrs = new Dictionary();
-
-       attrs->Set("__name", params->Get("host"));
-       attrs->Set("type", "Host");
-
-       Deserialize(host, attrs, false, FAConfig);
-
-       if (params->Contains("service"))
-               host->SetExtension("agent_service_name", params->Get("service"));
-
-       String command = params->Get("command");
-       String command_type = params->Get("command_type");
-
-       if (command_type == "check_command") {
-               if (!CheckCommand::GetByName(command)) {
-                       CheckResult::Ptr cr = new CheckResult();
-                       cr->SetState(ServiceUnknown);
-                       cr->SetOutput("Check command '" + command + "' does not exist.");
-                       Dictionary::Ptr message = MakeCheckResultMessage(host, cr);
-                       listener->SyncSendMessage(sourceEndpoint, message);
-                       return Empty;
-               }
-       } else if (command_type == "event_command") {
-               if (!EventCommand::GetByName(command)) {
-                       Log(LogWarning, "ClusterEvents")
-                               << "Event command '" << command << "' does not exist.";
-                       return Empty;
-               }
-       } else
-               return Empty;
-
-       attrs->Set(command_type, params->Get("command"));
-       attrs->Set("command_endpoint", sourceEndpoint->GetName());
-
-       Deserialize(host, attrs, false, FAConfig);
-
-       host->SetExtension("agent_check", true);
-
-       Dictionary::Ptr macros = params->Get("macros");
-
-       if (command_type == "check_command") {
-               try {
-                       host->ExecuteRemoteCheck(macros);
-               } catch (const std::exception& ex) {
-                       CheckResult::Ptr cr = new CheckResult();
-                       cr->SetState(ServiceUnknown);
-
-                       String output = "Exception occured while checking '" + host->GetName() + "': " + DiagnosticInformation(ex);
-                       cr->SetOutput(output);
-
-                       double now = Utility::GetTime();
-                       cr->SetScheduleStart(now);
-                       cr->SetScheduleEnd(now);
-                       cr->SetExecutionStart(now);
-                       cr->SetExecutionEnd(now);
-
-                       Dictionary::Ptr message = MakeCheckResultMessage(host, cr);
-                       listener->SyncSendMessage(sourceEndpoint, message);
-
-                       Log(LogCritical, "checker", output);
-               }
-       } else if (command_type == "event_command") {
-               host->ExecuteEventHandler(macros, true);
-       }
+       EnqueueCheck(origin, params);
 
        return Empty;
 }
index 2f474a0d0d511acc5a35f7d9f87c9e4cf58300d4..d712b7fb808b87611211716d3dba7d032cb2408e 100644 (file)
@@ -74,6 +74,15 @@ public:
        static void NotificationSentToAllUsersHandler(const Notification::Ptr& notification, const Checkable::Ptr& checkable, const std::set<User::Ptr>& users,
                NotificationType notificationType, const CheckResult::Ptr& cr, const String& author, const String& commentText, const MessageOrigin::Ptr& origin);
        static Value NotificationSentToAllUsersAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
+
+private:
+       static boost::mutex m_Mutex;
+       static std::deque<std::function<void ()>> m_CheckRequestQueue;
+       static bool m_CheckSchedulerRunning;
+
+       static void RemoteCheckThreadProc();
+       static void EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
+       static void ExecuteCheckFromQueue(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
 };
 
 }