]> granicus.if.org Git - icinga2/blob - lib/icinga/checkable-check.cpp
60a39ef54c895589f54a8be22c8d488d91e61254
[icinga2] / lib / icinga / checkable-check.cpp
1 /******************************************************************************
2  * Icinga 2                                                                   *
3  * Copyright (C) 2012-2014 Icinga Development Team (http://www.icinga.org)    *
4  *                                                                            *
5  * This program is free software; you can redistribute it and/or              *
6  * modify it under the terms of the GNU General Public License                *
7  * as published by the Free Software Foundation; either version 2             *
8  * of the License, or (at your option) any later version.                     *
9  *                                                                            *
10  * This program is distributed in the hope that it will be useful,            *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
13  * GNU General Public License for more details.                               *
14  *                                                                            *
15  * You should have received a copy of the GNU General Public License          *
16  * along with this program; if not, write to the Free Software Foundation     *
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.             *
18  ******************************************************************************/
19
20 #include "icinga/checkable.hpp"
21 #include "icinga/service.hpp"
22 #include "icinga/host.hpp"
23 #include "icinga/checkcommand.hpp"
24 #include "icinga/icingaapplication.hpp"
25 #include "icinga/cib.hpp"
26 #include "icinga/apievents.hpp"
27 #include "remote/messageorigin.hpp"
28 #include "remote/apilistener.hpp"
29 #include "base/objectlock.hpp"
30 #include "base/logger.hpp"
31 #include "base/convert.hpp"
32 #include "base/utility.hpp"
33 #include "base/context.hpp"
34 #include <boost/foreach.hpp>
35
36 using namespace icinga;
37
38 boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, const MessageOrigin&)> Checkable::OnNewCheckResult;
39 boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, StateType, const MessageOrigin&)> Checkable::OnStateChange;
40 boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, std::set<Checkable::Ptr>, const MessageOrigin&)> Checkable::OnReachabilityChanged;
41 boost::signals2::signal<void (const Checkable::Ptr&, NotificationType, const CheckResult::Ptr&, const String&, const String&)> Checkable::OnNotificationsRequested;
42 boost::signals2::signal<void (const Checkable::Ptr&, double, const MessageOrigin&)> Checkable::OnNextCheckChanged;
43 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnForceNextCheckChanged;
44 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnForceNextNotificationChanged;
45 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnEnableActiveChecksChanged;
46 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnEnablePassiveChecksChanged;
47 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnEnableNotificationsChanged;
48 boost::signals2::signal<void (const Checkable::Ptr&, bool, const MessageOrigin&)> Checkable::OnEnableFlappingChanged;
49 boost::signals2::signal<void (const Checkable::Ptr&, double, const MessageOrigin&)> Checkable::OnCheckIntervalChanged;
50 boost::signals2::signal<void (const Checkable::Ptr&, double, const MessageOrigin&)> Checkable::OnRetryIntervalChanged;
51 boost::signals2::signal<void (const Checkable::Ptr&, const CheckCommand::Ptr&, const MessageOrigin&)> Checkable::OnCheckCommandChanged;
52 boost::signals2::signal<void (const Checkable::Ptr&, int, const MessageOrigin&)> Checkable::OnMaxCheckAttemptsChanged;
53 boost::signals2::signal<void (const Checkable::Ptr&, const TimePeriod::Ptr&, const MessageOrigin&)> Checkable::OnCheckPeriodChanged;
54 boost::signals2::signal<void (const Checkable::Ptr&, FlappingState)> Checkable::OnFlappingChanged;
55
56 CheckCommand::Ptr Checkable::GetCheckCommand(void) const
57 {
58         String command;
59
60         if (!GetOverrideCheckCommand().IsEmpty())
61                 command = GetOverrideCheckCommand();
62         else
63                 command = GetCheckCommandRaw();
64
65         return CheckCommand::GetByName(command);
66 }
67
68 void Checkable::SetCheckCommand(const CheckCommand::Ptr& command, const MessageOrigin& origin)
69 {
70         SetOverrideCheckCommand(command->GetName());
71
72         OnCheckCommandChanged(this, command, origin);
73 }
74
75 TimePeriod::Ptr Checkable::GetCheckPeriod(void) const
76 {
77         String tp;
78
79         if (!GetOverrideCheckPeriod().IsEmpty())
80                 tp = GetOverrideCheckPeriod();
81         else
82                 tp = GetCheckPeriodRaw();
83
84         return TimePeriod::GetByName(tp);
85 }
86
87 void Checkable::SetCheckPeriod(const TimePeriod::Ptr& tp, const MessageOrigin& origin)
88 {
89         SetOverrideCheckPeriod(tp->GetName());
90
91         OnCheckPeriodChanged(this, tp, origin);
92 }
93
94 double Checkable::GetCheckInterval(void) const
95 {
96         if (!GetOverrideCheckInterval().IsEmpty())
97                 return GetOverrideCheckInterval();
98         else
99                 return GetCheckIntervalRaw();
100 }
101
102 void Checkable::SetCheckInterval(double interval, const MessageOrigin& origin)
103 {
104         SetOverrideCheckInterval(interval);
105
106         OnCheckIntervalChanged(this, interval, origin);
107 }
108
109 double Checkable::GetRetryInterval(void) const
110 {
111         if (!GetOverrideRetryInterval().IsEmpty())
112                 return GetOverrideRetryInterval();
113         else
114                 return GetRetryIntervalRaw();
115 }
116
117 void Checkable::SetRetryInterval(double interval, const MessageOrigin& origin)
118 {
119         SetOverrideRetryInterval(interval);
120
121         OnRetryIntervalChanged(this, interval, origin);
122 }
123
124 void Checkable::SetSchedulingOffset(long offset)
125 {
126         m_SchedulingOffset = offset;
127 }
128
129 long Checkable::GetSchedulingOffset(void)
130 {
131         return m_SchedulingOffset;
132 }
133
134 void Checkable::SetNextCheck(double nextCheck, const MessageOrigin& origin)
135 {
136         SetNextCheckRaw(nextCheck);
137
138         OnNextCheckChanged(this, nextCheck, origin);
139 }
140
141 double Checkable::GetNextCheck(void)
142 {
143         return GetNextCheckRaw();
144 }
145
146 void Checkable::UpdateNextCheck(void)
147 {
148         double interval;
149
150         if (GetStateType() == StateTypeSoft && GetLastCheckResult() != NULL)
151                 interval = GetRetryInterval();
152         else
153                 interval = GetCheckInterval();
154
155         double now = Utility::GetTime();
156         double adj = 0;
157
158         if (interval > 1)
159                 adj = fmod(now * 100 + GetSchedulingOffset(), interval * 100) / 100.0;
160
161         SetNextCheck(now - adj + interval);
162 }
163
164 bool Checkable::HasBeenChecked(void) const
165 {
166         return GetLastCheckResult() != NULL;
167 }
168
169 double Checkable::GetLastCheck(void) const
170 {
171         CheckResult::Ptr cr = GetLastCheckResult();
172         double schedule_end = -1;
173
174         if (cr)
175                 schedule_end = cr->GetScheduleEnd();
176
177         return schedule_end;
178 }
179
180 bool Checkable::GetEnableActiveChecks(void) const
181 {
182         if (!GetOverrideEnableActiveChecks().IsEmpty())
183                 return GetOverrideEnableActiveChecks();
184         else
185                 return GetEnableActiveChecksRaw();
186 }
187
188 void Checkable::SetEnableActiveChecks(bool enabled, const MessageOrigin& origin)
189 {
190         SetOverrideEnableActiveChecks(enabled);
191
192         OnEnableActiveChecksChanged(this, enabled, origin);
193 }
194
195 bool Checkable::GetEnablePassiveChecks(void) const
196 {
197         if (!GetOverrideEnablePassiveChecks().IsEmpty())
198                 return GetOverrideEnablePassiveChecks();
199         else
200                 return GetEnablePassiveChecksRaw();
201 }
202
203 void Checkable::SetEnablePassiveChecks(bool enabled, const MessageOrigin& origin)
204 {
205         SetOverrideEnablePassiveChecks(enabled);
206
207         OnEnablePassiveChecksChanged(this, enabled, origin);
208 }
209
210 bool Checkable::GetForceNextCheck(void) const
211 {
212         return GetForceNextCheckRaw();
213 }
214
215 void Checkable::SetForceNextCheck(bool forced, const MessageOrigin& origin)
216 {
217         SetForceNextCheckRaw(forced);
218
219         OnForceNextCheckChanged(this, forced, origin);
220 }
221
222 int Checkable::GetMaxCheckAttempts(void) const
223 {
224         if (!GetOverrideMaxCheckAttempts().IsEmpty())
225                 return GetOverrideMaxCheckAttempts();
226         else
227                 return GetMaxCheckAttemptsRaw();
228 }
229
230 void Checkable::SetMaxCheckAttempts(int attempts, const MessageOrigin& origin)
231 {
232         SetOverrideMaxCheckAttempts(attempts);
233
234         OnMaxCheckAttemptsChanged(this, attempts, origin);
235 }
236
237 void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrigin& origin)
238 {
239         {
240                 ObjectLock olock(this);
241                 m_CheckRunning = false;
242         }
243
244         double now = Utility::GetTime();
245
246         if (cr->GetScheduleStart() == 0)
247                 cr->SetScheduleStart(now);
248
249         if (cr->GetScheduleEnd() == 0)
250                 cr->SetScheduleEnd(now);
251
252         if (cr->GetExecutionStart() == 0)
253                 cr->SetExecutionStart(now);
254
255         if (cr->GetExecutionEnd() == 0)
256                 cr->SetExecutionEnd(now);
257
258         if (origin.IsLocal())
259                 cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());
260
261         Endpoint::Ptr command_endpoint = GetCommandEndpoint();
262
263         if (command_endpoint && (Endpoint::GetLocalEndpoint() != command_endpoint) && GetExtension("agent_check")) {
264                 ApiListener::Ptr listener = ApiListener::GetInstance();
265
266                 if (listener) {
267                         Dictionary::Ptr message = ApiEvents::MakeCheckResultMessage(this, cr);
268                         listener->SyncSendMessage(command_endpoint, message);
269                 }
270
271                 return;
272         }
273
274         bool reachable = IsReachable();
275         bool notification_reachable = IsReachable(DependencyNotification);
276
277         ASSERT(!OwnsLock());
278         ObjectLock olock(this);
279
280         CheckResult::Ptr old_cr = GetLastCheckResult();
281         ServiceState old_state = GetStateRaw();
282         StateType old_stateType = GetStateType();
283         long old_attempt = GetCheckAttempt();
284         bool recovery = false;
285
286         if (old_cr && cr->GetExecutionStart() < old_cr->GetExecutionStart())
287                 return;
288
289         /* The ExecuteCheck function already sets the old state, but we need to do it again
290          * in case this was a passive check result. */
291         SetLastStateRaw(old_state);
292         SetLastStateType(old_stateType);
293         SetLastReachable(reachable);
294
295         long attempt = 1;
296
297         std::set<Checkable::Ptr> children = GetChildren();
298
299         if (!old_cr) {
300                 SetStateType(StateTypeHard);
301         } else if (cr->GetState() == ServiceOK) {
302                 if (old_state == ServiceOK && old_stateType == StateTypeSoft) {
303                         SetStateType(StateTypeHard); // SOFT OK -> HARD OK
304                         recovery = true;
305                 }
306
307                 if (old_state != ServiceOK)
308                         recovery = true; // NOT OK -> SOFT/HARD OK
309
310                 ResetNotificationNumbers();
311                 SetLastStateOK(Utility::GetTime());
312
313                 /* update reachability for child objects in OK state */
314                 if (!children.empty())
315                         OnReachabilityChanged(this, cr, children, origin);
316         } else {
317                 if (old_attempt >= GetMaxCheckAttempts()) {
318                         SetStateType(StateTypeHard);
319                 } else if (old_stateType == StateTypeSoft || old_state == ServiceOK) {
320                         SetStateType(StateTypeSoft);
321                         attempt = old_attempt + 1;
322                 } else {
323                         attempt = old_attempt;
324                 }
325
326                 switch (cr->GetState()) {
327                         case ServiceOK:
328                                 /* Nothing to do here. */
329                                 break;
330                         case ServiceWarning:
331                                 SetLastStateWarning(Utility::GetTime());
332                                 break;
333                         case ServiceCritical:
334                                 SetLastStateCritical(Utility::GetTime());
335                                 break;
336                         case ServiceUnknown:
337                                 SetLastStateUnknown(Utility::GetTime());
338                                 break;
339                 }
340
341                 /* update reachability for child objects in NOT-OK state */
342                 if (!children.empty())
343                         OnReachabilityChanged(this, cr, children, origin);
344         }
345
346         if (!reachable)
347                 SetLastStateUnreachable(Utility::GetTime());
348
349         SetCheckAttempt(attempt);
350
351         ServiceState new_state = cr->GetState();
352         SetStateRaw(new_state);
353
354         bool stateChange = (old_state != new_state);
355         if (stateChange) {
356                 SetLastStateChange(now);
357
358                 /* remove acknowledgements */
359                 if (GetAcknowledgement() == AcknowledgementNormal ||
360                     (GetAcknowledgement() == AcknowledgementSticky && new_state == ServiceOK)) {
361                         ClearAcknowledgement();
362                 }
363
364                 /* reschedule direct parents */
365                 BOOST_FOREACH(const Checkable::Ptr& parent, GetParents()) {
366                         if (parent.get() == this)
367                                 continue;
368
369                         ObjectLock olock(parent);
370                         parent->SetNextCheck(Utility::GetTime());
371                 }
372         }
373
374         bool remove_acknowledgement_comments = false;
375
376         if (GetAcknowledgement() == AcknowledgementNone)
377                 remove_acknowledgement_comments = true;
378
379         bool hardChange = (GetStateType() == StateTypeHard && old_stateType == StateTypeSoft);
380
381         if (stateChange && old_stateType == StateTypeHard && GetStateType() == StateTypeHard)
382                 hardChange = true;
383
384         if (GetVolatile())
385                 hardChange = true;
386
387         if (hardChange) {
388                 SetLastHardStateRaw(new_state);
389                 SetLastHardStateChange(now);
390         }
391
392         if (new_state != ServiceOK)
393                 TriggerDowntimes();
394
395         Host::Ptr host;
396         Service::Ptr service;
397         tie(host, service) = GetHostService(this);
398
399         CheckableType checkable_type = CheckableHost;
400         if (service)
401                 checkable_type = CheckableService;
402
403         /* statistics for external tools */
404         Checkable::UpdateStatistics(cr, checkable_type);
405
406         bool in_downtime = IsInDowntime();
407         bool send_notification = hardChange && notification_reachable && !in_downtime && !IsAcknowledged();
408
409         if (!old_cr)
410                 send_notification = false; /* Don't send notifications for the initial state change */
411
412         if (old_state == ServiceOK && old_stateType == StateTypeSoft)
413                 send_notification = false; /* Don't send notifications for SOFT-OK -> HARD-OK. */
414
415         bool send_downtime_notification = (GetLastInDowntime() != in_downtime);
416         SetLastInDowntime(in_downtime);
417
418         olock.Unlock();
419
420         if (remove_acknowledgement_comments)
421                 RemoveCommentsByType(CommentAcknowledgement);
422
423         Dictionary::Ptr vars_after = new Dictionary();
424         vars_after->Set("state", new_state);
425         vars_after->Set("state_type", GetStateType());
426         vars_after->Set("attempt", GetCheckAttempt());
427         vars_after->Set("reachable", reachable);
428
429         if (old_cr)
430                 cr->SetVarsBefore(old_cr->GetVarsAfter());
431
432         cr->SetVarsAfter(vars_after);
433
434         olock.Lock();
435         SetLastCheckResult(cr);
436
437         bool was_flapping, is_flapping;
438
439         was_flapping = IsFlapping();
440         if (GetStateType() == StateTypeHard)
441                 UpdateFlappingStatus(stateChange);
442         is_flapping = IsFlapping();
443
444         olock.Unlock();
445
446 //      Log(LogDebug, "Checkable")
447 //          << "Flapping: Checkable " << GetName()
448 //          << " was: " << (was_flapping)
449 //          << " is: " << is_flapping)
450 //          << " threshold: " << GetFlappingThreshold()
451 //          << "% current: " + GetFlappingCurrent()) << "%.";
452
453         OnNewCheckResult(this, cr, origin);
454
455         /* signal status updates to for example db_ido */
456         OnStateChanged(this);
457
458         String old_state_str = (service ? Service::StateToString(old_state) : Host::StateToString(Host::CalculateState(old_state)));
459         String new_state_str = (service ? Service::StateToString(new_state) : Host::StateToString(Host::CalculateState(new_state)));
460
461         if (hardChange) {
462                 OnStateChange(this, cr, StateTypeHard, origin);
463                 Log(LogNotice, "Checkable")
464                     << "State Change: Checkable " << GetName() << " hard state change from " << old_state_str << " to " << new_state_str << " detected.";
465         } else if (stateChange) {
466                 OnStateChange(this, cr, StateTypeSoft, origin);
467                 Log(LogNotice, "Checkable")
468                     << "State Change: Checkable " << GetName() << " soft state change from " << old_state_str << " to " << new_state_str << " detected.";
469         }
470
471         if (GetStateType() == StateTypeSoft || hardChange || recovery)
472                 ExecuteEventHandler();
473
474         if (send_downtime_notification)
475                 OnNotificationsRequested(this, in_downtime ? NotificationDowntimeStart : NotificationDowntimeEnd, cr, "", "");
476
477         if (!was_flapping && is_flapping) {
478                 OnNotificationsRequested(this, NotificationFlappingStart, cr, "", "");
479
480                 Log(LogNotice, "Checkable")
481                     << "Flapping: Checkable " << GetName() << " started flapping (" << GetFlappingThreshold() << "% < " << GetFlappingCurrent() << "%).";
482                 OnFlappingChanged(this, FlappingStarted);
483         } else if (was_flapping && !is_flapping) {
484                 OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", "");
485
486                 Log(LogNotice, "Checkable")
487                     << "Flapping: Checkable " << GetName() << " stopped flapping (" << GetFlappingThreshold() << "% >= " << GetFlappingCurrent() << "%).";
488                 OnFlappingChanged(this, FlappingStopped);
489         } else if (send_notification)
490                 OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "");
491 }
492
493 bool Checkable::IsCheckPending(void) const
494 {
495         ObjectLock olock(this);
496         return m_CheckRunning;
497 }
498
499 void Checkable::ExecuteCheck(const Dictionary::Ptr& resolvedMacros, bool useResolvedMacros)
500 {
501         CONTEXT("Executing check for object '" + GetName() + "'");
502
503         ASSERT(!OwnsLock());
504
505         UpdateNextCheck();
506
507         bool reachable = IsReachable();
508
509         {
510                 ObjectLock olock(this);
511
512                 /* don't run another check if there is one pending */
513                 if (m_CheckRunning)
514                         return;
515
516                 m_CheckRunning = true;
517
518                 SetLastStateRaw(GetStateRaw());
519                 SetLastStateType(GetLastStateType());
520                 SetLastReachable(reachable);
521         }
522
523         /* keep track of scheduling info in case the check type doesn't provide its own information */
524         double scheduled_start = GetNextCheck();
525         double before_check = Utility::GetTime();
526
527         CheckResult::Ptr result = new CheckResult();
528
529         result->SetScheduleStart(scheduled_start);
530         result->SetExecutionStart(before_check);
531
532         Dictionary::Ptr macros;
533         Endpoint::Ptr endpoint = GetCommandEndpoint();
534
535         if (endpoint && !useResolvedMacros)
536                 macros = new Dictionary();
537         else
538                 macros = resolvedMacros;
539
540         GetCheckCommand()->Execute(this, result, macros, useResolvedMacros);
541
542         if (endpoint && !useResolvedMacros) {
543                 if (endpoint->IsConnected()) {
544                         Dictionary::Ptr message = new Dictionary();
545                         message->Set("jsonrpc", "2.0");
546                         message->Set("method", "event::ExecuteCommand");
547
548                         Host::Ptr host;
549                         Service::Ptr service;
550                         tie(host, service) = GetHostService(this);
551
552                         Dictionary::Ptr params = new Dictionary();
553                         message->Set("params", params);
554                         params->Set("command_type", "check_command");
555                         params->Set("command", GetCheckCommand()->GetName());
556                         params->Set("host", host->GetName());
557
558                         if (service)
559                                 params->Set("service", service->GetShortName());
560
561                         params->Set("macros", macros);
562
563                         ApiListener::Ptr listener = ApiListener::GetInstance();
564
565                         if (listener)
566                                 listener->SyncSendMessage(endpoint, message);
567                 } else if (Application::GetInstance()->GetStartTime() < Utility::GetTime() - 30) {
568                         result->SetState(ServiceUnknown);
569                         result->SetOutput("Remote Icinga instance '" + endpoint->GetName() + "' is not connected.");
570                         ProcessCheckResult(result);
571                 }
572
573                 {
574                         ObjectLock olock(this);
575                         m_CheckRunning = false;
576                 }
577         }
578 }
579
580 void Checkable::UpdateStatistics(const CheckResult::Ptr& cr, CheckableType type)
581 {
582         time_t ts = cr->GetScheduleEnd();
583
584         if (type == CheckableHost) {
585                 if (cr->GetActive())
586                         CIB::UpdateActiveHostChecksStatistics(ts, 1);
587                 else
588                         CIB::UpdatePassiveHostChecksStatistics(ts, 1);
589         } else if (type == CheckableService) {
590                 if (cr->GetActive())
591                         CIB::UpdateActiveServiceChecksStatistics(ts, 1);
592                 else
593                         CIB::UpdatePassiveServiceChecksStatistics(ts, 1);
594         } else {
595                 Log(LogWarning, "Checkable", "Unknown checkable type for statistic update.");
596         }
597 }
598
599 double Checkable::CalculateExecutionTime(const CheckResult::Ptr& cr)
600 {
601         if (!cr)
602                 return 0;
603
604         return cr->GetExecutionEnd() - cr->GetExecutionStart();
605 }
606
607 double Checkable::CalculateLatency(const CheckResult::Ptr& cr)
608 {
609         if (!cr)
610                 return 0;
611
612         double latency = (cr->GetScheduleEnd() - cr->GetScheduleStart()) - CalculateExecutionTime(cr);
613
614         if (latency < 0)
615                 latency = 0;
616
617         return latency;
618 }