]> granicus.if.org Git - icinga2/blob - lib/checker/checkercomponent.cpp
add some object locking to the Dump method (which could theoreticylly suffer from...
[icinga2] / lib / checker / checkercomponent.cpp
1 /* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
2
3 #include "checker/checkercomponent.hpp"
4 #include "checker/checkercomponent-ti.cpp"
5 #include "icinga/icingaapplication.hpp"
6 #include "icinga/cib.hpp"
7 #include "remote/apilistener.hpp"
8 #include "base/configuration.hpp"
9 #include "base/configtype.hpp"
10 #include "base/objectlock.hpp"
11 #include "base/utility.hpp"
12 #include "base/perfdatavalue.hpp"
13 #include "base/logger.hpp"
14 #include "base/exception.hpp"
15 #include "base/convert.hpp"
16 #include "base/statsfunction.hpp"
17
18 using namespace icinga;
19
20 REGISTER_TYPE(CheckerComponent);
21
22 REGISTER_STATSFUNCTION(CheckerComponent, &CheckerComponent::StatsFunc);
23
24 void CheckerComponent::StatsFunc(const Dictionary::Ptr& status, const Array::Ptr& perfdata)
25 {
26         DictionaryData nodes;
27
28         for (const CheckerComponent::Ptr& checker : ConfigType::GetObjectsByType<CheckerComponent>()) {
29                 unsigned long idle = checker->GetIdleCheckables();
30                 unsigned long pending = checker->GetPendingCheckables();
31
32                 nodes.emplace_back(checker->GetName(), new Dictionary({
33                         { "idle", idle },
34                         { "pending", pending }
35                 }));
36
37                 String perfdata_prefix = "checkercomponent_" + checker->GetName() + "_";
38                 perfdata->Add(new PerfdataValue(perfdata_prefix + "idle", Convert::ToDouble(idle)));
39                 perfdata->Add(new PerfdataValue(perfdata_prefix + "pending", Convert::ToDouble(pending)));
40         }
41
42         status->Set("checkercomponent", new Dictionary(std::move(nodes)));
43 }
44
45 void CheckerComponent::OnConfigLoaded()
46 {
47         ConfigObject::OnActiveChanged.connect(std::bind(&CheckerComponent::ObjectHandler, this, _1));
48         ConfigObject::OnPausedChanged.connect(std::bind(&CheckerComponent::ObjectHandler, this, _1));
49
50         Checkable::OnNextCheckChanged.connect(std::bind(&CheckerComponent::NextCheckChangedHandler, this, _1));
51 }
52
53 void CheckerComponent::Start(bool runtimeCreated)
54 {
55         ObjectImpl<CheckerComponent>::Start(runtimeCreated);
56
57         Log(LogInformation, "CheckerComponent")
58                 << "'" << GetName() << "' started.";
59
60
61         m_Thread = std::thread(std::bind(&CheckerComponent::CheckThreadProc, this));
62
63         m_ResultTimer = new Timer();
64         m_ResultTimer->SetInterval(5);
65         m_ResultTimer->OnTimerExpired.connect(std::bind(&CheckerComponent::ResultTimerHandler, this));
66         m_ResultTimer->Start();
67 }
68
69 void CheckerComponent::Stop(bool runtimeRemoved)
70 {
71         {
72                 boost::mutex::scoped_lock lock(m_Mutex);
73                 m_Stopped = true;
74                 m_CV.notify_all();
75         }
76
77         double wait = 0.0;
78
79         while (GetPendingCheckables() > 0) {
80                 Log(LogDebug, "CheckerComponent")
81                         << "Waiting for running checks (" << GetPendingCheckables()
82                         << ") to finish. Waited for " << wait << " seconds now.";
83
84                 Utility::Sleep(0.1);
85                 wait += 0.1;
86
87                 /* Pick a timeout slightly shorther than the process reload timeout. */
88                 double reloadTimeout = Application::GetReloadTimeout();
89                 double waitMax = reloadTimeout - 30;
90                 if (waitMax <= 0)
91                         waitMax = 1;
92
93                 if (wait > waitMax) {
94                         Log(LogWarning, "CheckerComponent")
95                                 << "Checks running too long for " << wait
96                                 << " seconds, hard shutdown before reload timeout: " << reloadTimeout << ".";
97                         break;
98                 }
99         }
100
101         m_ResultTimer->Stop();
102         m_Thread.join();
103
104         Log(LogInformation, "CheckerComponent")
105                 << "'" << GetName() << "' stopped.";
106
107         ObjectImpl<CheckerComponent>::Stop(runtimeRemoved);
108 }
109
110 void CheckerComponent::CheckThreadProc()
111 {
112         Utility::SetThreadName("Check Scheduler");
113         IcingaApplication::Ptr icingaApp = IcingaApplication::GetInstance();
114
115         boost::mutex::scoped_lock lock(m_Mutex);
116
117         for (;;) {
118                 typedef boost::multi_index::nth_index<CheckableSet, 1>::type CheckTimeView;
119                 CheckTimeView& idx = boost::get<1>(m_IdleCheckables);
120
121                 while (idx.begin() == idx.end() && !m_Stopped)
122                         m_CV.wait(lock);
123
124                 if (m_Stopped)
125                         break;
126
127                 auto it = idx.begin();
128                 CheckableScheduleInfo csi = *it;
129
130                 double wait = csi.NextCheck - Utility::GetTime();
131
132 //#ifdef I2_DEBUG
133 //              Log(LogDebug, "CheckerComponent")
134 //                      << "Pending checks " << Checkable::GetPendingChecks()
135 //                      << " vs. max concurrent checks " << icingaApp->GetMaxConcurrentChecks() << ".";
136 //#endif /* I2_DEBUG */
137
138                 if (Checkable::GetPendingChecks() >= icingaApp->GetMaxConcurrentChecks())
139                         wait = 0.5;
140
141                 if (wait > 0) {
142                         /* Wait for the next check. */
143                         m_CV.timed_wait(lock, boost::posix_time::milliseconds(long(wait * 1000)));
144
145                         continue;
146                 }
147
148                 Checkable::Ptr checkable = csi.Object;
149
150                 m_IdleCheckables.erase(checkable);
151
152                 bool forced = checkable->GetForceNextCheck();
153                 bool check = true;
154
155                 if (!forced) {
156                         if (!checkable->IsReachable(DependencyCheckExecution)) {
157                                 Log(LogNotice, "CheckerComponent")
158                                         << "Skipping check for object '" << checkable->GetName() << "': Dependency failed.";
159                                 check = false;
160                         }
161
162                         Host::Ptr host;
163                         Service::Ptr service;
164                         tie(host, service) = GetHostService(checkable);
165
166                         if (host && !service && (!checkable->GetEnableActiveChecks() || !icingaApp->GetEnableHostChecks())) {
167                                 Log(LogNotice, "CheckerComponent")
168                                         << "Skipping check for host '" << host->GetName() << "': active host checks are disabled";
169                                 check = false;
170                         }
171                         if (host && service && (!checkable->GetEnableActiveChecks() || !icingaApp->GetEnableServiceChecks())) {
172                                 Log(LogNotice, "CheckerComponent")
173                                         << "Skipping check for service '" << service->GetName() << "': active service checks are disabled";
174                                 check = false;
175                         }
176
177                         TimePeriod::Ptr tp = checkable->GetCheckPeriod();
178
179                         if (tp && !tp->IsInside(Utility::GetTime())) {
180                                 Log(LogNotice, "CheckerComponent")
181                                         << "Skipping check for object '" << checkable->GetName()
182                                         << "': not in check period '" << tp->GetName() << "'";
183                                 check = false;
184                         }
185                 }
186
187                 /* reschedule the checkable if checks are disabled */
188                 if (!check) {
189                         m_IdleCheckables.insert(GetCheckableScheduleInfo(checkable));
190                         lock.unlock();
191
192                         Log(LogDebug, "CheckerComponent")
193                                 << "Checks for checkable '" << checkable->GetName() << "' are disabled. Rescheduling check.";
194
195                         checkable->UpdateNextCheck();
196
197                         lock.lock();
198
199                         continue;
200                 }
201
202
203                 csi = GetCheckableScheduleInfo(checkable);
204
205                 Log(LogDebug, "CheckerComponent")
206                         << "Scheduling info for checkable '" << checkable->GetName() << "' ("
207                         << Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", checkable->GetNextCheck()) << "): Object '"
208                         << csi.Object->GetName() << "', Next Check: "
209                         << Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", csi.NextCheck) << "(" << csi.NextCheck << ").";
210
211                 m_PendingCheckables.insert(csi);
212
213                 lock.unlock();
214
215                 if (forced) {
216                         ObjectLock olock(checkable);
217                         checkable->SetForceNextCheck(false);
218                 }
219
220                 Log(LogDebug, "CheckerComponent")
221                         << "Executing check for '" << checkable->GetName() << "'";
222
223                 Checkable::IncreasePendingChecks();
224
225                 Utility::QueueAsyncCallback(std::bind(&CheckerComponent::ExecuteCheckHelper, CheckerComponent::Ptr(this), checkable));
226
227                 lock.lock();
228         }
229 }
230
231 void CheckerComponent::ExecuteCheckHelper(const Checkable::Ptr& checkable)
232 {
233         try {
234                 checkable->ExecuteCheck();
235         } catch (const std::exception& ex) {
236                 CheckResult::Ptr cr = new CheckResult();
237                 cr->SetState(ServiceUnknown);
238
239                 String output = "Exception occurred while checking '" + checkable->GetName() + "': " + DiagnosticInformation(ex);
240                 cr->SetOutput(output);
241
242                 double now = Utility::GetTime();
243                 cr->SetScheduleStart(now);
244                 cr->SetScheduleEnd(now);
245                 cr->SetExecutionStart(now);
246                 cr->SetExecutionEnd(now);
247
248                 checkable->ProcessCheckResult(cr);
249
250                 Log(LogCritical, "checker", output);
251         }
252
253         Checkable::DecreasePendingChecks();
254
255         {
256                 boost::mutex::scoped_lock lock(m_Mutex);
257
258                 /* remove the object from the list of pending objects; if it's not in the
259                  * list this was a manual (i.e. forced) check and we must not re-add the
260                  * object to the list because it's already there. */
261                 auto it = m_PendingCheckables.find(checkable);
262
263                 if (it != m_PendingCheckables.end()) {
264                         m_PendingCheckables.erase(it);
265
266                         if (checkable->IsActive())
267                                 m_IdleCheckables.insert(GetCheckableScheduleInfo(checkable));
268
269                         m_CV.notify_all();
270                 }
271         }
272
273         Log(LogDebug, "CheckerComponent")
274                 << "Check finished for object '" << checkable->GetName() << "'";
275 }
276
277 void CheckerComponent::ResultTimerHandler()
278 {
279         std::ostringstream msgbuf;
280
281         {
282                 boost::mutex::scoped_lock lock(m_Mutex);
283
284                 msgbuf << "Pending checkables: " << m_PendingCheckables.size() << "; Idle checkables: " << m_IdleCheckables.size() << "; Checks/s: "
285                         << (CIB::GetActiveHostChecksStatistics(60) + CIB::GetActiveServiceChecksStatistics(60)) / 60.0;
286         }
287
288         Log(LogNotice, "CheckerComponent", msgbuf.str());
289 }
290
291 void CheckerComponent::ObjectHandler(const ConfigObject::Ptr& object)
292 {
293         Checkable::Ptr checkable = dynamic_pointer_cast<Checkable>(object);
294
295         if (!checkable)
296                 return;
297
298         Zone::Ptr zone = Zone::GetByName(checkable->GetZoneName());
299         bool same_zone = (!zone || Zone::GetLocalZone() == zone);
300
301         {
302                 boost::mutex::scoped_lock lock(m_Mutex);
303
304                 if (object->IsActive() && !object->IsPaused() && same_zone) {
305                         if (m_PendingCheckables.find(checkable) != m_PendingCheckables.end())
306                                 return;
307
308                         m_IdleCheckables.insert(GetCheckableScheduleInfo(checkable));
309                 } else {
310                         m_IdleCheckables.erase(checkable);
311                         m_PendingCheckables.erase(checkable);
312                 }
313
314                 m_CV.notify_all();
315         }
316 }
317
318 CheckableScheduleInfo CheckerComponent::GetCheckableScheduleInfo(const Checkable::Ptr& checkable)
319 {
320         CheckableScheduleInfo csi;
321         csi.Object = checkable;
322         csi.NextCheck = checkable->GetNextCheck();
323         return csi;
324 }
325
326 void CheckerComponent::NextCheckChangedHandler(const Checkable::Ptr& checkable)
327 {
328         boost::mutex::scoped_lock lock(m_Mutex);
329
330         /* remove and re-insert the object from the set in order to force an index update */
331         typedef boost::multi_index::nth_index<CheckableSet, 0>::type CheckableView;
332         CheckableView& idx = boost::get<0>(m_IdleCheckables);
333
334         auto it = idx.find(checkable);
335
336         if (it == idx.end())
337                 return;
338
339         idx.erase(checkable);
340
341         CheckableScheduleInfo csi = GetCheckableScheduleInfo(checkable);
342         idx.insert(csi);
343
344         m_CV.notify_all();
345 }
346
347 unsigned long CheckerComponent::GetIdleCheckables()
348 {
349         boost::mutex::scoped_lock lock(m_Mutex);
350
351         return m_IdleCheckables.size();
352 }
353
354 unsigned long CheckerComponent::GetPendingCheckables()
355 {
356         boost::mutex::scoped_lock lock(m_Mutex);
357
358         return m_PendingCheckables.size();
359 }