1 /******************************************************************************
3 * Copyright (C) 2012-2014 Icinga Development Team (http://www.icinga.org) *
5 * This program is free software; you can redistribute it and/or *
6 * modify it under the terms of the GNU General Public License *
7 * as published by the Free Software Foundation; either version 2 *
8 * of the License, or (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the Free Software Foundation *
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
18 ******************************************************************************/
20 #include "icinga/checkable.hpp"
21 #include "icinga/service.hpp"
22 #include "icinga/host.hpp"
23 #include "icinga/checkcommand.hpp"
24 #include "icinga/icingaapplication.hpp"
25 #include "icinga/cib.hpp"
26 #include "icinga/apievents.hpp"
27 #include "remote/messageorigin.hpp"
28 #include "remote/apilistener.hpp"
29 #include "base/objectlock.hpp"
30 #include "base/logger.hpp"
31 #include "base/convert.hpp"
32 #include "base/utility.hpp"
33 #include "base/context.hpp"
34 #include <boost/foreach.hpp>
36 using namespace icinga;
38 boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, const MessageOrigin&)> Checkable::OnNewCheckResult;
39 boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, StateType, const MessageOrigin&)> Checkable::OnStateChange;
40 boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, std::set<Checkable::Ptr>, const MessageOrigin&)> Checkable::OnReachabilityChanged;
41 boost::signals2::signal<void (const Checkable::Ptr&, NotificationType, const CheckResult::Ptr&, const String&, const String&)> Checkable::OnNotificationsRequested;
42 boost::signals2::signal<void (const Checkable::Ptr&, double, const MessageOrigin&)> Checkable::OnNextCheckChanged;
43 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnForceNextCheckChanged;
44 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnForceNextNotificationChanged;
45 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnEnableActiveChecksChanged;
46 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnEnablePassiveChecksChanged;
47 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnEnableNotificationsChanged;
48 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnEnableFlappingChanged;
49 boost::signals2::signal<void (const Checkable::Ptr&, double, const MessageOrigin&)> Checkable::OnCheckIntervalChanged;
50 boost::signals2::signal<void (const Checkable::Ptr&, double, const MessageOrigin&)> Checkable::OnRetryIntervalChanged;
51 boost::signals2::signal<void (const Checkable::Ptr&, const CheckCommand::Ptr&, const MessageOrigin&)> Checkable::OnCheckCommandChanged;
52 boost::signals2::signal<void (const Checkable::Ptr&, int, const MessageOrigin&)> Checkable::OnMaxCheckAttemptsChanged;
53 boost::signals2::signal<void (const Checkable::Ptr&, const TimePeriod::Ptr&, const MessageOrigin&)> Checkable::OnCheckPeriodChanged;
54 boost::signals2::signal<void (const Checkable::Ptr&, FlappingState)> Checkable::OnFlappingChanged;
56 CheckCommand::Ptr Checkable::GetCheckCommand(void) const
60 if (!GetOverrideCheckCommand().IsEmpty())
61 command = GetOverrideCheckCommand();
63 command = GetCheckCommandRaw();
65 return CheckCommand::GetByName(command);
68 void Checkable::SetCheckCommand(const CheckCommand::Ptr& command, const MessageOrigin& origin)
70 SetOverrideCheckCommand(command->GetName());
72 OnCheckCommandChanged(this, command, origin);
75 TimePeriod::Ptr Checkable::GetCheckPeriod(void) const
79 if (!GetOverrideCheckPeriod().IsEmpty())
80 tp = GetOverrideCheckPeriod();
82 tp = GetCheckPeriodRaw();
84 return TimePeriod::GetByName(tp);
87 void Checkable::SetCheckPeriod(const TimePeriod::Ptr& tp, const MessageOrigin& origin)
89 SetOverrideCheckPeriod(tp->GetName());
91 OnCheckPeriodChanged(this, tp, origin);
94 double Checkable::GetCheckInterval(void) const
96 if (!GetOverrideCheckInterval().IsEmpty())
97 return GetOverrideCheckInterval();
99 return GetCheckIntervalRaw();
102 void Checkable::SetCheckInterval(double interval, const MessageOrigin& origin)
104 SetOverrideCheckInterval(interval);
106 OnCheckIntervalChanged(this, interval, origin);
109 double Checkable::GetRetryInterval(void) const
111 if (!GetOverrideRetryInterval().IsEmpty())
112 return GetOverrideRetryInterval();
114 return GetRetryIntervalRaw();
117 void Checkable::SetRetryInterval(double interval, const MessageOrigin& origin)
119 SetOverrideRetryInterval(interval);
121 OnRetryIntervalChanged(this, interval, origin);
124 void Checkable::SetSchedulingOffset(long offset)
126 m_SchedulingOffset = offset;
129 long Checkable::GetSchedulingOffset(void)
131 return m_SchedulingOffset;
134 void Checkable::SetNextCheck(double nextCheck, const MessageOrigin& origin)
136 SetNextCheckRaw(nextCheck);
138 OnNextCheckChanged(this, nextCheck, origin);
141 double Checkable::GetNextCheck(void)
143 return GetNextCheckRaw();
146 void Checkable::UpdateNextCheck(void)
150 if (GetStateType() == StateTypeSoft && GetLastCheckResult() != NULL)
151 interval = GetRetryInterval();
153 interval = GetCheckInterval();
155 double now = Utility::GetTime();
159 adj = fmod(now * 100 + GetSchedulingOffset(), interval * 100) / 100.0;
161 SetNextCheck(now - adj + interval);
164 bool Checkable::HasBeenChecked(void) const
166 return GetLastCheckResult() != NULL;
169 double Checkable::GetLastCheck(void) const
171 CheckResult::Ptr cr = GetLastCheckResult();
172 double schedule_end = -1;
175 schedule_end = cr->GetScheduleEnd();
180 bool Checkable::GetEnableActiveChecks(void) const
182 if (!GetOverrideEnableActiveChecks().IsEmpty())
183 return GetOverrideEnableActiveChecks();
185 return GetEnableActiveChecksRaw();
188 void Checkable::SetEnableActiveChecks(bool enabled, const MessageOrigin& origin)
190 SetOverrideEnableActiveChecks(enabled);
192 OnEnableActiveChecksChanged(this, enabled, origin);
195 bool Checkable::GetEnablePassiveChecks(void) const
197 if (!GetOverrideEnablePassiveChecks().IsEmpty())
198 return GetOverrideEnablePassiveChecks();
200 return GetEnablePassiveChecksRaw();
203 void Checkable::SetEnablePassiveChecks(bool enabled, const MessageOrigin& origin)
205 SetOverrideEnablePassiveChecks(enabled);
207 OnEnablePassiveChecksChanged(this, enabled, origin);
210 bool Checkable::GetForceNextCheck(void) const
212 return GetForceNextCheckRaw();
215 void Checkable::SetForceNextCheck(bool forced, const MessageOrigin& origin)
217 SetForceNextCheckRaw(forced);
219 OnForceNextCheckChanged(this, forced, origin);
222 int Checkable::GetMaxCheckAttempts(void) const
224 if (!GetOverrideMaxCheckAttempts().IsEmpty())
225 return GetOverrideMaxCheckAttempts();
227 return GetMaxCheckAttemptsRaw();
230 void Checkable::SetMaxCheckAttempts(int attempts, const MessageOrigin& origin)
232 SetOverrideMaxCheckAttempts(attempts);
234 OnMaxCheckAttemptsChanged(this, attempts, origin);
237 void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrigin& origin)
240 ObjectLock olock(this);
241 m_CheckRunning = false;
244 double now = Utility::GetTime();
246 if (cr->GetScheduleStart() == 0)
247 cr->SetScheduleStart(now);
249 if (cr->GetScheduleEnd() == 0)
250 cr->SetScheduleEnd(now);
252 if (cr->GetExecutionStart() == 0)
253 cr->SetExecutionStart(now);
255 if (cr->GetExecutionEnd() == 0)
256 cr->SetExecutionEnd(now);
258 if (origin.IsLocal())
259 cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());
261 Endpoint::Ptr command_endpoint = GetCommandEndpoint();
263 if (command_endpoint && (Endpoint::GetLocalEndpoint() != command_endpoint) && GetExtension("agent_check")) {
264 ApiListener::Ptr listener = ApiListener::GetInstance();
267 Dictionary::Ptr message = ApiEvents::MakeCheckResultMessage(this, cr);
268 listener->SyncSendMessage(command_endpoint, message);
274 bool reachable = IsReachable();
275 bool notification_reachable = IsReachable(DependencyNotification);
278 ObjectLock olock(this);
280 CheckResult::Ptr old_cr = GetLastCheckResult();
281 ServiceState old_state = GetStateRaw();
282 StateType old_stateType = GetStateType();
283 long old_attempt = GetCheckAttempt();
284 bool recovery = false;
286 if (old_cr && cr->GetExecutionStart() < old_cr->GetExecutionStart())
289 /* The ExecuteCheck function already sets the old state, but we need to do it again
290 * in case this was a passive check result. */
291 SetLastStateRaw(old_state);
292 SetLastStateType(old_stateType);
293 SetLastReachable(reachable);
297 std::set<Checkable::Ptr> children = GetChildren();
300 SetStateType(StateTypeHard);
301 } else if (cr->GetState() == ServiceOK) {
302 if (old_state == ServiceOK && old_stateType == StateTypeSoft) {
303 SetStateType(StateTypeHard); // SOFT OK -> HARD OK
307 if (old_state != ServiceOK)
308 recovery = true; // NOT OK -> SOFT/HARD OK
310 ResetNotificationNumbers();
311 SetLastStateOK(Utility::GetTime());
313 /* update reachability for child objects in OK state */
314 if (!children.empty())
315 OnReachabilityChanged(this, cr, children, origin);
317 if (old_attempt >= GetMaxCheckAttempts()) {
318 SetStateType(StateTypeHard);
319 } else if (old_stateType == StateTypeSoft || old_state == ServiceOK) {
320 SetStateType(StateTypeSoft);
321 attempt = old_attempt + 1;
323 attempt = old_attempt;
326 switch (cr->GetState()) {
328 /* Nothing to do here. */
331 SetLastStateWarning(Utility::GetTime());
333 case ServiceCritical:
334 SetLastStateCritical(Utility::GetTime());
337 SetLastStateUnknown(Utility::GetTime());
341 /* update reachability for child objects in NOT-OK state */
342 if (!children.empty())
343 OnReachabilityChanged(this, cr, children, origin);
347 SetLastStateUnreachable(Utility::GetTime());
349 SetCheckAttempt(attempt);
351 ServiceState new_state = cr->GetState();
352 SetStateRaw(new_state);
354 bool stateChange = (old_state != new_state);
356 SetLastStateChange(now);
358 /* remove acknowledgements */
359 if (GetAcknowledgement() == AcknowledgementNormal ||
360 (GetAcknowledgement() == AcknowledgementSticky && new_state == ServiceOK)) {
361 ClearAcknowledgement();
364 /* reschedule direct parents */
365 BOOST_FOREACH(const Checkable::Ptr& parent, GetParents()) {
366 if (parent.get() == this)
369 ObjectLock olock(parent);
370 parent->SetNextCheck(Utility::GetTime());
374 bool remove_acknowledgement_comments = false;
376 if (GetAcknowledgement() == AcknowledgementNone)
377 remove_acknowledgement_comments = true;
379 bool hardChange = (GetStateType() == StateTypeHard && old_stateType == StateTypeSoft);
381 if (stateChange && old_stateType == StateTypeHard && GetStateType() == StateTypeHard)
388 SetLastHardStateRaw(new_state);
389 SetLastHardStateChange(now);
392 if (new_state != ServiceOK)
396 Service::Ptr service;
397 tie(host, service) = GetHostService(this);
399 CheckableType checkable_type = CheckableHost;
401 checkable_type = CheckableService;
403 /* statistics for external tools */
404 Checkable::UpdateStatistics(cr, checkable_type);
406 bool in_downtime = IsInDowntime();
407 bool send_notification = hardChange && notification_reachable && !in_downtime && !IsAcknowledged();
410 send_notification = false; /* Don't send notifications for the initial state change */
412 if (old_state == ServiceOK && old_stateType == StateTypeSoft)
413 send_notification = false; /* Don't send notifications for SOFT-OK -> HARD-OK. */
415 bool send_downtime_notification = (GetLastInDowntime() != in_downtime);
416 SetLastInDowntime(in_downtime);
420 if (remove_acknowledgement_comments)
421 RemoveCommentsByType(CommentAcknowledgement);
423 Dictionary::Ptr vars_after = new Dictionary();
424 vars_after->Set("state", new_state);
425 vars_after->Set("state_type", GetStateType());
426 vars_after->Set("attempt", GetCheckAttempt());
427 vars_after->Set("reachable", reachable);
430 cr->SetVarsBefore(old_cr->GetVarsAfter());
432 cr->SetVarsAfter(vars_after);
435 SetLastCheckResult(cr);
437 bool was_flapping, is_flapping;
439 was_flapping = IsFlapping();
440 if (GetStateType() == StateTypeHard)
441 UpdateFlappingStatus(stateChange);
442 is_flapping = IsFlapping();
446 // Log(LogDebug, "Checkable")
447 // << "Flapping: Checkable " << GetName()
448 // << " was: " << (was_flapping)
449 // << " is: " << is_flapping)
450 // << " threshold: " << GetFlappingThreshold()
451 // << "% current: " + GetFlappingCurrent()) << "%.";
453 OnNewCheckResult(this, cr, origin);
455 /* signal status updates to for example db_ido */
456 OnStateChanged(this);
458 String old_state_str = (service ? Service::StateToString(old_state) : Host::StateToString(Host::CalculateState(old_state)));
459 String new_state_str = (service ? Service::StateToString(new_state) : Host::StateToString(Host::CalculateState(new_state)));
462 OnStateChange(this, cr, StateTypeHard, origin);
463 Log(LogNotice, "Checkable")
464 << "State Change: Checkable " << GetName() << " hard state change from " << old_state_str << " to " << new_state_str << " detected.";
465 } else if (stateChange) {
466 OnStateChange(this, cr, StateTypeSoft, origin);
467 Log(LogNotice, "Checkable")
468 << "State Change: Checkable " << GetName() << " soft state change from " << old_state_str << " to " << new_state_str << " detected.";
471 if (GetStateType() == StateTypeSoft || hardChange || recovery)
472 ExecuteEventHandler();
474 if (send_downtime_notification)
475 OnNotificationsRequested(this, in_downtime ? NotificationDowntimeStart : NotificationDowntimeEnd, cr, "", "");
477 if (!was_flapping && is_flapping) {
478 OnNotificationsRequested(this, NotificationFlappingStart, cr, "", "");
480 Log(LogNotice, "Checkable")
481 << "Flapping: Checkable " << GetName() << " started flapping (" << GetFlappingThreshold() << "% < " << GetFlappingCurrent() << "%).";
482 OnFlappingChanged(this, FlappingStarted);
483 } else if (was_flapping && !is_flapping) {
484 OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", "");
486 Log(LogNotice, "Checkable")
487 << "Flapping: Checkable " << GetName() << " stopped flapping (" << GetFlappingThreshold() << "% >= " << GetFlappingCurrent() << "%).";
488 OnFlappingChanged(this, FlappingStopped);
489 } else if (send_notification)
490 OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "");
493 bool Checkable::IsCheckPending(void) const
495 ObjectLock olock(this);
496 return m_CheckRunning;
499 void Checkable::ExecuteCheck(const Dictionary::Ptr& resolvedMacros, bool useResolvedMacros)
501 CONTEXT("Executing check for object '" + GetName() + "'");
507 bool reachable = IsReachable();
510 ObjectLock olock(this);
512 /* don't run another check if there is one pending */
516 m_CheckRunning = true;
518 SetLastStateRaw(GetStateRaw());
519 SetLastStateType(GetLastStateType());
520 SetLastReachable(reachable);
523 /* keep track of scheduling info in case the check type doesn't provide its own information */
524 double scheduled_start = GetNextCheck();
525 double before_check = Utility::GetTime();
527 CheckResult::Ptr result = new CheckResult();
529 result->SetScheduleStart(scheduled_start);
530 result->SetExecutionStart(before_check);
532 Dictionary::Ptr macros;
533 Endpoint::Ptr endpoint = GetCommandEndpoint();
535 if (endpoint && !useResolvedMacros)
536 macros = new Dictionary();
538 macros = resolvedMacros;
540 GetCheckCommand()->Execute(this, result, macros, useResolvedMacros);
542 if (endpoint && !useResolvedMacros) {
543 if (endpoint->IsConnected()) {
544 Dictionary::Ptr message = new Dictionary();
545 message->Set("jsonrpc", "2.0");
546 message->Set("method", "event::ExecuteCommand");
549 Service::Ptr service;
550 tie(host, service) = GetHostService(this);
552 Dictionary::Ptr params = new Dictionary();
553 message->Set("params", params);
554 params->Set("command_type", "check_command");
555 params->Set("command", GetCheckCommand()->GetName());
556 params->Set("host", host->GetName());
559 params->Set("service", service->GetShortName());
561 params->Set("macros", macros);
563 ApiListener::Ptr listener = ApiListener::GetInstance();
566 listener->SyncSendMessage(endpoint, message);
567 } else if (Application::GetInstance()->GetStartTime() < Utility::GetTime() - 30) {
568 result->SetState(ServiceUnknown);
569 result->SetOutput("Remote Icinga instance '" + endpoint->GetName() + "' is not connected.");
570 ProcessCheckResult(result);
574 ObjectLock olock(this);
575 m_CheckRunning = false;
580 void Checkable::UpdateStatistics(const CheckResult::Ptr& cr, CheckableType type)
582 time_t ts = cr->GetScheduleEnd();
584 if (type == CheckableHost) {
586 CIB::UpdateActiveHostChecksStatistics(ts, 1);
588 CIB::UpdatePassiveHostChecksStatistics(ts, 1);
589 } else if (type == CheckableService) {
591 CIB::UpdateActiveServiceChecksStatistics(ts, 1);
593 CIB::UpdatePassiveServiceChecksStatistics(ts, 1);
595 Log(LogWarning, "Checkable", "Unknown checkable type for statistic update.");
599 double Checkable::CalculateExecutionTime(const CheckResult::Ptr& cr)
604 return cr->GetExecutionEnd() - cr->GetExecutionStart();
607 double Checkable::CalculateLatency(const CheckResult::Ptr& cr)
612 double latency = (cr->GetScheduleEnd() - cr->GetScheduleStart()) - CalculateExecutionTime(cr);