]> granicus.if.org Git - icinga2/blob - lib/icinga/service-check.cpp
Implemented support for disabling notifications.
[icinga2] / lib / icinga / service-check.cpp
1 /******************************************************************************
2  * Icinga 2                                                                   *
3  * Copyright (C) 2012 Icinga Development Team (http://www.icinga.org/)        *
4  *                                                                            *
5  * This program is free software; you can redistribute it and/or              *
6  * modify it under the terms of the GNU General Public License                *
7  * as published by the Free Software Foundation; either version 2             *
8  * of the License, or (at your option) any later version.                     *
9  *                                                                            *
10  * This program is distributed in the hope that it will be useful,            *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
13  * GNU General Public License for more details.                               *
14  *                                                                            *
15  * You should have received a copy of the GNU General Public License          *
16  * along with this program; if not, write to the Free Software Foundation     *
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.             *
18  ******************************************************************************/
19
20 #include "i2-icinga.h"
21
22 using namespace icinga;
23
24 const int Service::DefaultMaxCheckAttempts = 3;
25 const int Service::DefaultCheckInterval = 5 * 60;
26 const int Service::CheckIntervalDivisor = 5;
27
28 signals2::signal<void (const Service::Ptr&, const String&)> Service::OnCheckerChanged;
29 signals2::signal<void (const Service::Ptr&, const Value&)> Service::OnNextCheckChanged;
30
31 Value Service::GetCheckCommand(void) const
32 {
33         return m_CheckCommand;
34 }
35
36 long Service::GetMaxCheckAttempts(void) const
37 {
38         if (m_MaxCheckAttempts.IsEmpty())
39                 return DefaultMaxCheckAttempts;
40
41         return m_MaxCheckAttempts;
42 }
43
44 double Service::GetCheckInterval(void) const
45 {
46         if (m_CheckInterval.IsEmpty())
47                 return DefaultCheckInterval;
48
49         return m_CheckInterval;
50 }
51
52 double Service::GetRetryInterval(void) const
53 {
54         if (m_RetryInterval.IsEmpty())
55                 return GetCheckInterval() / CheckIntervalDivisor;
56
57         return m_RetryInterval;
58 }
59
60 Dictionary::Ptr Service::GetCheckers(void) const
61 {
62         return m_Checkers;
63 }
64
65 void Service::SetSchedulingOffset(long offset)
66 {
67         m_SchedulingOffset = offset;
68 }
69
70 long Service::GetSchedulingOffset(void)
71 {
72         return m_SchedulingOffset;
73 }
74
75 void Service::SetNextCheck(double nextCheck)
76 {
77         m_NextCheck = nextCheck;
78         Touch("next_check");
79 }
80
81 double Service::GetNextCheck(void)
82 {
83         if (m_NextCheck.IsEmpty()) {
84                 UpdateNextCheck();
85
86                 if (m_NextCheck.IsEmpty())
87                         BOOST_THROW_EXCEPTION(runtime_error("Failed to schedule next check."));
88         }
89
90         return m_NextCheck;
91 }
92
93 void Service::UpdateNextCheck(void)
94 {
95         double interval;
96
97         if (GetStateType() == StateTypeSoft)
98                 interval = GetRetryInterval();
99         else
100                 interval = GetCheckInterval();
101
102         double now = Utility::GetTime();
103         double adj = 0;
104
105         if (interval > 1)
106                 adj = fmod(now + GetSchedulingOffset(), interval);
107
108         SetNextCheck(now - adj + interval);
109 }
110
111 void Service::SetCurrentChecker(const String& checker)
112 {
113         m_CurrentChecker = checker;
114         Touch("current_checker");
115 }
116
117 String Service::GetCurrentChecker(void) const
118 {
119         return m_CurrentChecker;
120 }
121
122 void Service::SetCurrentCheckAttempt(long attempt)
123 {
124         m_CheckAttempt = attempt;
125         Touch("check_attempt");
126 }
127
128 long Service::GetCurrentCheckAttempt(void) const
129 {
130         if (m_CheckAttempt.IsEmpty())
131                 return 1;
132
133         return m_CheckAttempt;
134 }
135
136 void Service::SetState(ServiceState state)
137 {
138         m_State = static_cast<long>(state);
139         Touch("state");
140 }
141
142 ServiceState Service::GetState(void) const
143 {
144         if (m_State.IsEmpty())
145                 return StateUnknown;
146
147         int ivalue = static_cast<int>(m_State);
148         return static_cast<ServiceState>(ivalue);
149 }
150
151 void Service::SetStateType(ServiceStateType type)
152 {
153         m_StateType = static_cast<long>(type);
154         Touch("state_type");
155 }
156
157 ServiceStateType Service::GetStateType(void) const
158 {
159         if (m_StateType.IsEmpty())
160                 return StateTypeSoft;
161
162         int ivalue = static_cast<int>(m_StateType);
163         return static_cast<ServiceStateType>(ivalue);
164 }
165
166 void Service::SetLastCheckResult(const Dictionary::Ptr& result)
167 {
168         m_LastResult = result;
169         Touch("last_result");
170 }
171
172 Dictionary::Ptr Service::GetLastCheckResult(void) const
173 {
174         return m_LastResult;
175 }
176
177 void Service::SetLastStateChange(double ts)
178 {
179         m_LastStateChange = ts;
180         Touch("last_state_change");
181 }
182
183 double Service::GetLastStateChange(void) const
184 {
185         if (m_LastStateChange.IsEmpty())
186                 return IcingaApplication::GetInstance()->GetStartTime();
187
188         return m_LastStateChange;
189 }
190
191 void Service::SetLastHardStateChange(double ts)
192 {
193         m_LastHardStateChange = ts;
194         Touch("last_hard_state_change");
195 }
196
197 double Service::GetLastHardStateChange(void) const
198 {
199         if (m_LastHardStateChange.IsEmpty())
200                 return IcingaApplication::GetInstance()->GetStartTime();
201
202         return m_LastHardStateChange;
203 }
204
205 bool Service::GetEnableActiveChecks(void) const
206 {
207         if (m_EnableActiveChecks.IsEmpty())
208                 return true;
209         else
210                 return m_EnableActiveChecks;
211 }
212
213 void Service::SetEnableActiveChecks(bool enabled)
214 {
215         m_EnableActiveChecks = enabled ? 1 : 0;
216         Touch("enable_active_checks");
217 }
218
219 bool Service::GetEnablePassiveChecks(void) const
220 {
221         if (m_EnablePassiveChecks.IsEmpty())
222                 return true;
223         else
224                 return m_EnablePassiveChecks;
225 }
226
227 void Service::SetEnablePassiveChecks(bool enabled)
228 {
229         m_EnablePassiveChecks = enabled ? 1 : 0;
230         Touch("enable_passive_checks");
231 }
232
233 bool Service::GetForceNextCheck(void) const
234 {
235         if (m_ForceNextCheck.IsEmpty())
236                 return false;
237
238         return static_cast<bool>(m_ForceNextCheck);
239 }
240
241 void Service::SetForceNextCheck(bool forced)
242 {
243         m_ForceNextCheck = forced ? 1 : 0;
244         Touch("force_next_check");
245 }
246
247 void Service::ApplyCheckResult(const Dictionary::Ptr& cr)
248 {
249         ServiceState old_state = GetState();
250         ServiceStateType old_stateType = GetStateType();
251         bool hardChange = false;
252         bool recovery;
253
254         long attempt = GetCurrentCheckAttempt();
255
256         if (cr->Get("state") == StateOK) {
257                 if (old_state != StateOK && old_stateType == StateTypeHard)
258                         hardChange = true; // hard recovery
259
260                 if (old_state == StateOK)
261                         SetStateType(StateTypeHard);
262
263                 attempt = 1;
264                 recovery = true;
265         } else {
266                 if (attempt >= GetMaxCheckAttempts()) {
267                         SetStateType(StateTypeHard);
268                         attempt = 1;
269                         hardChange = true;
270                 } else if (GetStateType() == StateTypeSoft || GetState() == StateOK) {
271                         SetStateType(StateTypeSoft);
272                         attempt++;
273                 }
274
275                 recovery = false;
276         }
277
278         SetCurrentCheckAttempt(attempt);
279
280         int state = cr->Get("state");
281         SetState(static_cast<ServiceState>(state));
282
283         SetLastCheckResult(cr);
284
285         double now = Utility::GetTime();
286
287         if (old_state != GetState()) {
288                 SetLastStateChange(now);
289
290                 /* remove acknowledgements */
291                 if (GetAcknowledgement() == AcknowledgementNormal ||
292                     (GetAcknowledgement() == AcknowledgementSticky && GetStateType() == StateTypeHard && GetState() == StateOK)) {
293                         SetAcknowledgement(AcknowledgementNone);
294                         SetAcknowledgementExpiry(0);
295                 }
296
297                 /* reschedule service dependencies */
298                 BOOST_FOREACH(const Service::Ptr& parent, GetParentServices()) {
299                         parent->SetNextCheck(Utility::GetTime());
300                 }
301
302                 /* reschedule host dependencies */
303                 BOOST_FOREACH(const Host::Ptr& parent, GetParentHosts()) {
304                         Service::Ptr service = parent->GetHostCheckService();
305
306                         if (service)
307                                 service->SetNextCheck(Utility::GetTime());
308                 }
309         }
310
311         if (GetState() != StateOK)
312                 TriggerDowntimes();
313
314         if (hardChange) {
315                 SetLastHardStateChange(now);
316
317                 /* Make sure the notification component sees the updated
318                  * state/state_type attributes. */
319                 Flush();
320
321                 if (IsReachable(GetSelf()) && !IsInDowntime() && !IsAcknowledged())
322                         RequestNotifications(recovery ? NotificationRecovery : NotificationProblem);
323         }
324 }
325
326 ServiceState Service::StateFromString(const String& state)
327 {
328         if (state == "OK")
329                 return StateOK;
330         else if (state == "WARNING")
331                 return StateWarning;
332         else if (state == "CRITICAL")
333                 return StateCritical;
334         else if (state == "UNCHECKABLE")
335                 return StateUncheckable;
336         else
337                 return StateUnknown;
338 }
339
340 String Service::StateToString(ServiceState state)
341 {
342         switch (state) {
343                 case StateOK:
344                         return "OK";
345                 case StateWarning:
346                         return "WARNING";
347                 case StateCritical:
348                         return "CRITICAL";
349                 case StateUncheckable:
350                         return "UNCHECKABLE";
351                 case StateUnknown:
352                 default:
353                         return "UNKNOWN";
354         }
355 }
356
357 ServiceStateType Service::StateTypeFromString(const String& type)
358 {
359         if (type == "SOFT")
360                 return StateTypeSoft;
361         else
362                 return StateTypeHard;
363 }
364
365 String Service::StateTypeToString(ServiceStateType type)
366 {
367         if (type == StateTypeSoft)
368                 return "SOFT";
369         else
370                 return "HARD";
371 }
372
373 bool Service::IsAllowedChecker(const String& checker) const
374 {
375         Dictionary::Ptr checkers = GetCheckers();
376
377         if (!checkers)
378                 return true;
379
380         Value pattern;
381         BOOST_FOREACH(tie(tuples::ignore, pattern), checkers) {
382                 if (Utility::Match(pattern, checker))
383                         return true;
384         }
385
386         return false;
387 }
388
389 void Service::BeginExecuteCheck(const Service::Ptr& self, const function<void (void)>& callback)
390 {
391         ObjectLock slock(self);
392
393         /* don't run another check if there is one pending */
394         if (self->m_CurrentTask) {
395                 slock.Unlock();
396
397                 /* we need to call the callback anyway */
398                 callback();
399
400                 return;
401         }
402
403         /* keep track of scheduling info in case the check type doesn't provide its own information */
404         Dictionary::Ptr checkInfo = boost::make_shared<Dictionary>();
405         checkInfo->Set("schedule_start", self->GetNextCheck());
406         checkInfo->Set("execution_start", Utility::GetTime());
407
408         vector<Dictionary::Ptr> macroDicts;
409         macroDicts.push_back(self->GetMacros());
410         macroDicts.push_back(Service::CalculateDynamicMacros(self));
411
412         Value raw_command = self->GetCheckCommand();
413
414         Host::Ptr host = self->GetHost();
415
416         slock.Unlock();
417
418         {
419                 ObjectLock olock(host);
420                 macroDicts.push_back(host->GetMacros());
421         }
422
423         macroDicts.push_back(Host::CalculateDynamicMacros(host));
424
425         IcingaApplication::Ptr app = IcingaApplication::GetInstance();
426
427         {
428                 ObjectLock olock(app);
429                 macroDicts.push_back(app->GetMacros());
430         }
431
432         macroDicts.push_back(IcingaApplication::CalculateDynamicMacros(app));
433
434         Dictionary::Ptr macros = MacroProcessor::MergeMacroDicts(macroDicts);
435
436         checkInfo->Set("macros", macros);
437
438         vector<Value> arguments;
439         arguments.push_back(self);
440         arguments.push_back(macros);
441
442         ScriptTask::Ptr task;
443
444         {
445                 ObjectLock olock(self);
446                 task = self->MakeMethodTask("check", arguments);
447                 self->m_CurrentTask = task;
448         }
449
450         task->Start(boost::bind(&Service::CheckCompletedHandler, self, checkInfo, _1, callback));
451 }
452
453 void Service::CheckCompletedHandler(const Dictionary::Ptr& checkInfo,
454     const ScriptTask::Ptr& task, const function<void (void)>& callback)
455 {
456         checkInfo->Set("execution_end", Utility::GetTime());
457         checkInfo->Set("schedule_end", Utility::GetTime());
458
459         Dictionary::Ptr result;
460
461         try {
462                 Value vresult;
463
464                 {
465                         ObjectLock tlock(task);
466                         vresult = task->GetResult();
467                 }
468
469                 if (vresult.IsObjectType<Dictionary>())
470                         result = vresult;
471         } catch (const exception& ex) {
472                 stringstream msgbuf;
473                 msgbuf << "Exception occured during check for service '"
474                        << GetName() << "': " << diagnostic_information(ex);
475                 String message = msgbuf.str();
476
477                 Logger::Write(LogWarning, "icinga", message);
478
479                 result = boost::make_shared<Dictionary>();
480                 result->Set("state", StateUnknown);
481                 result->Set("output", message);
482         }
483
484         if (result) {
485                 if (!result->Contains("schedule_start"))
486                         result->Set("schedule_start", checkInfo->Get("schedule_start"));
487
488                 if (!result->Contains("schedule_end"))
489                         result->Set("schedule_end", checkInfo->Get("schedule_end"));
490
491                 if (!result->Contains("execution_start"))
492                         result->Set("execution_start", checkInfo->Get("execution_start"));
493
494                 if (!result->Contains("execution_end"))
495                         result->Set("execution_end", checkInfo->Get("execution_end"));
496
497                 if (!result->Contains("macros"))
498                         result->Set("macros", checkInfo->Get("macros"));
499
500                 if (!result->Contains("active"))
501                         result->Set("active", 1);
502
503                 if (!result->Contains("current_checker")) {
504                         EndpointManager::Ptr em = EndpointManager::GetInstance();
505                         ObjectLock olock(em);
506
507                         result->Set("current_checker", em->GetIdentity());
508                 }
509         }
510
511         {
512                 ObjectLock olock(this);
513                 if (result)
514                         ProcessCheckResult(result);
515
516                 m_CurrentTask.reset();
517
518                 /* figure out when the next check is for this service; the call to
519                  * ApplyCheckResult() should've already done this but lets do it again
520                  * just in case there was no check result. */
521                 UpdateNextCheck();
522         }
523
524         callback();
525 }
526
527 void Service::ProcessCheckResult(const Dictionary::Ptr& cr)
528 {
529         ApplyCheckResult(cr);
530
531         Service::UpdateStatistics(cr);
532
533         /* Flush the object so other instances see the service's
534          * new state when they receive the CheckResult message */
535         Flush();
536
537         RequestMessage rm;
538         rm.SetMethod("checker::CheckResult");
539
540         /* TODO: add _old_ state to message */
541         CheckResultMessage params;
542         params.SetService(GetName());
543         params.SetCheckResult(cr);
544
545         rm.SetParams(params);
546
547         EndpointManager::Ptr em = EndpointManager::GetInstance();
548         ObjectLock olock(em);
549         em->SendMulticastMessage(rm);
550 }
551
552 void Service::UpdateStatistics(const Dictionary::Ptr& cr)
553 {
554         time_t ts;
555         Value schedule_end = cr->Get("schedule_end");
556         if (!schedule_end.IsEmpty())
557                 ts = static_cast<time_t>(schedule_end);
558         else
559                 ts = static_cast<time_t>(Utility::GetTime());
560
561         Value active = cr->Get("active");
562         if (active.IsEmpty() || static_cast<long>(active))
563                 CIB::UpdateActiveChecksStatistics(ts, 1);
564         else
565                 CIB::UpdatePassiveChecksStatistics(ts, 1);
566 }
567
568 double Service::CalculateExecutionTime(const Dictionary::Ptr& cr)
569 {
570         ObjectLock olock(cr);
571
572         double execution_start = 0, execution_end = 0;
573
574         if (cr) {
575                 ObjectLock olock(cr);
576
577                 if (!cr->Contains("execution_start") || !cr->Contains("execution_end"))
578                         return 0;
579
580                 execution_start = cr->Get("execution_start");
581                 execution_end = cr->Get("execution_end");
582         }
583
584         return (execution_end - execution_start);
585 }
586
587 double Service::CalculateLatency(const Dictionary::Ptr& cr)
588 {
589         double schedule_start = 0, schedule_end = 0;
590
591         if (cr) {
592                 ObjectLock olock(cr);
593
594                 if (!cr->Contains("schedule_start") || !cr->Contains("schedule_end"))
595                         return 0;
596
597                 schedule_start = cr->Get("schedule_start");
598                 schedule_end = cr->Get("schedule_end");
599         }
600
601         return (schedule_end - schedule_start) - CalculateExecutionTime(cr);
602
603 }