1 /* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
3 #include "perfdata/graphitewriter.hpp"
4 #include "perfdata/graphitewriter-ti.cpp"
5 #include "icinga/service.hpp"
6 #include "icinga/checkcommand.hpp"
7 #include "icinga/macroprocessor.hpp"
8 #include "icinga/icingaapplication.hpp"
9 #include "base/tcpsocket.hpp"
10 #include "base/configtype.hpp"
11 #include "base/objectlock.hpp"
12 #include "base/logger.hpp"
13 #include "base/convert.hpp"
14 #include "base/utility.hpp"
15 #include "base/perfdatavalue.hpp"
16 #include "base/application.hpp"
17 #include "base/stream.hpp"
18 #include "base/networkstream.hpp"
19 #include "base/exception.hpp"
20 #include "base/statsfunction.hpp"
21 #include <boost/algorithm/string.hpp>
22 #include <boost/algorithm/string/replace.hpp>
25 using namespace icinga;
27 REGISTER_TYPE(GraphiteWriter);
29 REGISTER_STATSFUNCTION(GraphiteWriter, &GraphiteWriter::StatsFunc);
31 void GraphiteWriter::OnConfigLoaded()
33 ObjectImpl<GraphiteWriter>::OnConfigLoaded();
35 m_WorkQueue.SetName("GraphiteWriter, " + GetName());
38 Log(LogDebug, "GraphiteWriter")
39 << "HA functionality disabled. Won't pause connection: " << GetName();
41 SetHAMode(HARunEverywhere);
47 void GraphiteWriter::StatsFunc(const Dictionary::Ptr& status, const Array::Ptr& perfdata)
51 for (const GraphiteWriter::Ptr& graphitewriter : ConfigType::GetObjectsByType<GraphiteWriter>()) {
52 size_t workQueueItems = graphitewriter->m_WorkQueue.GetLength();
53 double workQueueItemRate = graphitewriter->m_WorkQueue.GetTaskCount(60) / 60.0;
55 nodes.emplace_back(graphitewriter->GetName(), new Dictionary({
56 { "work_queue_items", workQueueItems },
57 { "work_queue_item_rate", workQueueItemRate },
58 { "connected", graphitewriter->GetConnected() }
61 perfdata->Add(new PerfdataValue("graphitewriter_" + graphitewriter->GetName() + "_work_queue_items", workQueueItems));
62 perfdata->Add(new PerfdataValue("graphitewriter_" + graphitewriter->GetName() + "_work_queue_item_rate", workQueueItemRate));
65 status->Set("graphitewriter", new Dictionary(std::move(nodes)));
68 void GraphiteWriter::Resume()
70 ObjectImpl<GraphiteWriter>::Resume();
72 Log(LogInformation, "GraphiteWriter")
73 << "'" << GetName() << "' resumed.";
75 /* Register exception handler for WQ tasks. */
76 m_WorkQueue.SetExceptionCallback(std::bind(&GraphiteWriter::ExceptionHandler, this, _1));
78 /* Timer for reconnecting */
79 m_ReconnectTimer = new Timer();
80 m_ReconnectTimer->SetInterval(10);
81 m_ReconnectTimer->OnTimerExpired.connect(std::bind(&GraphiteWriter::ReconnectTimerHandler, this));
82 m_ReconnectTimer->Start();
83 m_ReconnectTimer->Reschedule(0);
85 /* Register event handlers. */
86 Checkable::OnNewCheckResult.connect(std::bind(&GraphiteWriter::CheckResultHandler, this, _1, _2));
89 /* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */
90 void GraphiteWriter::Pause()
92 m_ReconnectTimer.reset();
96 } catch (const std::exception&) {
97 Log(LogInformation, "GraphiteWriter")
98 << "'" << GetName() << "' paused. Unable to connect, not flushing buffers. Data may be lost on reload.";
100 ObjectImpl<GraphiteWriter>::Pause();
105 DisconnectInternal();
107 Log(LogInformation, "GraphiteWriter")
108 << "'" << GetName() << "' paused.";
110 ObjectImpl<GraphiteWriter>::Pause();
113 void GraphiteWriter::AssertOnWorkQueue()
115 ASSERT(m_WorkQueue.IsWorkerThread());
118 void GraphiteWriter::ExceptionHandler(boost::exception_ptr exp)
120 Log(LogCritical, "GraphiteWriter", "Exception during Graphite operation: Verify that your backend is operational!");
122 Log(LogDebug, "GraphiteWriter")
123 << "Exception during Graphite operation: " << DiagnosticInformation(std::move(exp));
125 if (GetConnected()) {
132 void GraphiteWriter::Reconnect()
144 void GraphiteWriter::ReconnectInternal()
146 double startTime = Utility::GetTime();
148 CONTEXT("Reconnecting to Graphite '" + GetName() + "'");
150 SetShouldConnect(true);
155 TcpSocket::Ptr socket = new TcpSocket();
157 Log(LogNotice, "GraphiteWriter")
158 << "Reconnecting to Graphite on host '" << GetHost() << "' port '" << GetPort() << "'.";
161 socket->Connect(GetHost(), GetPort());
162 } catch (const std::exception& ex) {
163 Log(LogCritical, "GraphiteWriter")
164 << "Can't connect to Graphite on host '" << GetHost() << "' port '" << GetPort() << "'.";
168 m_Stream = new NetworkStream(socket);
172 Log(LogInformation, "GraphiteWriter")
173 << "Finished reconnecting to Graphite in " << std::setw(2) << Utility::GetTime() - startTime << " second(s).";
176 void GraphiteWriter::ReconnectTimerHandler()
181 m_WorkQueue.Enqueue(std::bind(&GraphiteWriter::Reconnect, this), PriorityNormal);
184 void GraphiteWriter::Disconnect()
188 DisconnectInternal();
191 void GraphiteWriter::DisconnectInternal()
201 void GraphiteWriter::CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
206 m_WorkQueue.Enqueue(std::bind(&GraphiteWriter::CheckResultHandlerInternal, this, checkable, cr));
209 void GraphiteWriter::CheckResultHandlerInternal(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
213 CONTEXT("Processing check result for '" + checkable->GetName() + "'");
215 /* TODO: Deal with missing connection here. Needs refactoring
216 * into parsing the actual performance data and then putting it
217 * into a queue for re-inserting. */
219 if (!IcingaApplication::GetInstance()->GetEnablePerfdata() || !checkable->GetEnablePerfdata())
223 Service::Ptr service;
224 tie(host, service) = GetHostService(checkable);
226 MacroProcessor::ResolverList resolvers;
228 resolvers.emplace_back("service", service);
229 resolvers.emplace_back("host", host);
230 resolvers.emplace_back("icinga", IcingaApplication::GetInstance());
235 prefix = MacroProcessor::ResolveMacros(GetServiceNameTemplate(), resolvers, cr, nullptr, std::bind(&GraphiteWriter::EscapeMacroMetric, _1));
237 prefix = MacroProcessor::ResolveMacros(GetHostNameTemplate(), resolvers, cr, nullptr, std::bind(&GraphiteWriter::EscapeMacroMetric, _1));
240 String prefixPerfdata = prefix + ".perfdata";
241 String prefixMetadata = prefix + ".metadata";
243 double ts = cr->GetExecutionEnd();
245 if (GetEnableSendMetadata()) {
247 SendMetric(checkable, prefixMetadata, "state", service->GetState(), ts);
249 SendMetric(checkable, prefixMetadata, "state", host->GetState(), ts);
252 SendMetric(checkable, prefixMetadata, "current_attempt", checkable->GetCheckAttempt(), ts);
253 SendMetric(checkable, prefixMetadata, "max_check_attempts", checkable->GetMaxCheckAttempts(), ts);
254 SendMetric(checkable, prefixMetadata, "state_type", checkable->GetStateType(), ts);
255 SendMetric(checkable, prefixMetadata, "reachable", checkable->IsReachable(), ts);
256 SendMetric(checkable, prefixMetadata, "downtime_depth", checkable->GetDowntimeDepth(), ts);
257 SendMetric(checkable, prefixMetadata, "acknowledgement", checkable->GetAcknowledgement(), ts);
258 SendMetric(checkable, prefixMetadata, "latency", cr->CalculateLatency(), ts);
259 SendMetric(checkable, prefixMetadata, "execution_time", cr->CalculateExecutionTime(), ts);
262 SendPerfdata(checkable, prefixPerfdata, cr, ts);
265 void GraphiteWriter::SendPerfdata(const Checkable::Ptr& checkable, const String& prefix, const CheckResult::Ptr& cr, double ts)
267 Array::Ptr perfdata = cr->GetPerformanceData();
272 CheckCommand::Ptr checkCommand = checkable->GetCheckCommand();
274 ObjectLock olock(perfdata);
275 for (const Value& val : perfdata) {
276 PerfdataValue::Ptr pdv;
278 if (val.IsObjectType<PerfdataValue>())
282 pdv = PerfdataValue::Parse(val);
283 } catch (const std::exception&) {
284 Log(LogWarning, "GraphiteWriter")
285 << "Ignoring invalid perfdata for checkable '"
286 << checkable->GetName() << "' and command '"
287 << checkCommand->GetName() << "' with value: " << val;
292 String escapedKey = EscapeMetricLabel(pdv->GetLabel());
294 SendMetric(checkable, prefix, escapedKey + ".value", pdv->GetValue(), ts);
296 if (GetEnableSendThresholds()) {
298 SendMetric(checkable, prefix, escapedKey + ".crit", pdv->GetCrit(), ts);
300 SendMetric(checkable, prefix, escapedKey + ".warn", pdv->GetWarn(), ts);
302 SendMetric(checkable, prefix, escapedKey + ".min", pdv->GetMin(), ts);
304 SendMetric(checkable, prefix, escapedKey + ".max", pdv->GetMax(), ts);
309 void GraphiteWriter::SendMetric(const Checkable::Ptr& checkable, const String& prefix, const String& name, double value, double ts)
311 std::ostringstream msgbuf;
312 msgbuf << prefix << "." << name << " " << Convert::ToString(value) << " " << static_cast<long>(ts);
314 Log(LogDebug, "GraphiteWriter")
315 << "Checkable '" << checkable->GetName() << "' adds to metric list: '" << msgbuf.str() << "'.";
317 // do not send \n to debug log
319 String metric = msgbuf.str();
321 boost::mutex::scoped_lock lock(m_StreamMutex);
327 m_Stream->Write(metric.CStr(), metric.GetLength());
328 } catch (const std::exception& ex) {
329 Log(LogCritical, "GraphiteWriter")
330 << "Cannot write to TCP socket on host '" << GetHost() << "' port '" << GetPort() << "'.";
336 String GraphiteWriter::EscapeMetric(const String& str)
340 //don't allow '.' in metric prefixes
341 boost::replace_all(result, " ", "_");
342 boost::replace_all(result, ".", "_");
343 boost::replace_all(result, "\\", "_");
344 boost::replace_all(result, "/", "_");
349 String GraphiteWriter::EscapeMetricLabel(const String& str)
353 //allow to pass '.' in perfdata labels
354 boost::replace_all(result, " ", "_");
355 boost::replace_all(result, "\\", "_");
356 boost::replace_all(result, "/", "_");
357 boost::replace_all(result, "::", ".");
362 Value GraphiteWriter::EscapeMacroMetric(const Value& value)
364 if (value.IsObjectType<Array>()) {
365 Array::Ptr arr = value;
368 ObjectLock olock(arr);
369 for (const Value& arg : arr) {
370 result.push_back(EscapeMetric(arg));
373 return Utility::Join(new Array(std::move(result)), '.');
375 return EscapeMetric(value);
378 void GraphiteWriter::ValidateHostNameTemplate(const Lazy<String>& lvalue, const ValidationUtils& utils)
380 ObjectImpl<GraphiteWriter>::ValidateHostNameTemplate(lvalue, utils);
382 if (!MacroProcessor::ValidateMacroString(lvalue()))
383 BOOST_THROW_EXCEPTION(ValidationError(this, { "host_name_template" }, "Closing $ not found in macro format string '" + lvalue() + "'."));
386 void GraphiteWriter::ValidateServiceNameTemplate(const Lazy<String>& lvalue, const ValidationUtils& utils)
388 ObjectImpl<GraphiteWriter>::ValidateServiceNameTemplate(lvalue, utils);
390 if (!MacroProcessor::ValidateMacroString(lvalue()))
391 BOOST_THROW_EXCEPTION(ValidationError(this, { "service_name_template" }, "Closing $ not found in macro format string '" + lvalue() + "'."));