1 /* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
3 #include "perfdata/influxdbwriter.hpp"
4 #include "perfdata/influxdbwriter-ti.cpp"
5 #include "remote/url.hpp"
6 #include "icinga/service.hpp"
7 #include "icinga/macroprocessor.hpp"
8 #include "icinga/icingaapplication.hpp"
9 #include "icinga/checkcommand.hpp"
10 #include "base/application.hpp"
11 #include "base/defer.hpp"
12 #include "base/io-engine.hpp"
13 #include "base/tcpsocket.hpp"
14 #include "base/configtype.hpp"
15 #include "base/objectlock.hpp"
16 #include "base/logger.hpp"
17 #include "base/convert.hpp"
18 #include "base/utility.hpp"
19 #include "base/perfdatavalue.hpp"
20 #include "base/stream.hpp"
21 #include "base/json.hpp"
22 #include "base/networkstream.hpp"
23 #include "base/exception.hpp"
24 #include "base/statsfunction.hpp"
25 #include "base/tlsutility.hpp"
26 #include <boost/algorithm/string.hpp>
27 #include <boost/algorithm/string/replace.hpp>
28 #include <boost/asio/ssl/context.hpp>
29 #include <boost/beast/core/flat_buffer.hpp>
30 #include <boost/beast/http/field.hpp>
31 #include <boost/beast/http/message.hpp>
32 #include <boost/beast/http/parser.hpp>
33 #include <boost/beast/http/read.hpp>
34 #include <boost/beast/http/status.hpp>
35 #include <boost/beast/http/string_body.hpp>
36 #include <boost/beast/http/verb.hpp>
37 #include <boost/beast/http/write.hpp>
38 #include <boost/math/special_functions/fpclassify.hpp>
39 #include <boost/regex.hpp>
40 #include <boost/scoped_array.hpp>
45 using namespace icinga;
47 class InfluxdbInteger final : public Object
50 DECLARE_PTR_TYPEDEFS(InfluxdbInteger);
52 InfluxdbInteger(int value)
65 REGISTER_TYPE(InfluxdbWriter);
67 REGISTER_STATSFUNCTION(InfluxdbWriter, &InfluxdbWriter::StatsFunc);
69 void InfluxdbWriter::OnConfigLoaded()
71 ObjectImpl<InfluxdbWriter>::OnConfigLoaded();
73 m_WorkQueue.SetName("InfluxdbWriter, " + GetName());
76 Log(LogDebug, "InfluxdbWriter")
77 << "HA functionality disabled. Won't pause connection: " << GetName();
79 SetHAMode(HARunEverywhere);
85 void InfluxdbWriter::StatsFunc(const Dictionary::Ptr& status, const Array::Ptr& perfdata)
89 for (const InfluxdbWriter::Ptr& influxdbwriter : ConfigType::GetObjectsByType<InfluxdbWriter>()) {
90 size_t workQueueItems = influxdbwriter->m_WorkQueue.GetLength();
91 double workQueueItemRate = influxdbwriter->m_WorkQueue.GetTaskCount(60) / 60.0;
92 size_t dataBufferItems = influxdbwriter->m_DataBuffer.size();
94 nodes.emplace_back(influxdbwriter->GetName(), new Dictionary({
95 { "work_queue_items", workQueueItems },
96 { "work_queue_item_rate", workQueueItemRate },
97 { "data_buffer_items", dataBufferItems }
100 perfdata->Add(new PerfdataValue("influxdbwriter_" + influxdbwriter->GetName() + "_work_queue_items", workQueueItems));
101 perfdata->Add(new PerfdataValue("influxdbwriter_" + influxdbwriter->GetName() + "_work_queue_item_rate", workQueueItemRate));
102 perfdata->Add(new PerfdataValue("influxdbwriter_" + influxdbwriter->GetName() + "_data_queue_items", dataBufferItems));
105 status->Set("influxdbwriter", new Dictionary(std::move(nodes)));
108 void InfluxdbWriter::Resume()
110 ObjectImpl<InfluxdbWriter>::Resume();
112 Log(LogInformation, "InfluxdbWriter")
113 << "'" << GetName() << "' resumed.";
115 /* Register exception handler for WQ tasks. */
116 m_WorkQueue.SetExceptionCallback(std::bind(&InfluxdbWriter::ExceptionHandler, this, _1));
118 /* Setup timer for periodically flushing m_DataBuffer */
119 m_FlushTimer = new Timer();
120 m_FlushTimer->SetInterval(GetFlushInterval());
121 m_FlushTimer->OnTimerExpired.connect(std::bind(&InfluxdbWriter::FlushTimeout, this));
122 m_FlushTimer->Start();
123 m_FlushTimer->Reschedule(0);
125 /* Register for new metrics. */
126 Checkable::OnNewCheckResult.connect(std::bind(&InfluxdbWriter::CheckResultHandler, this, _1, _2));
129 /* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */
130 void InfluxdbWriter::Pause()
133 Log(LogDebug, "InfluxdbWriter")
134 << "Flushing pending data buffers.";
138 /* Work on the missing tasks. TODO: Find a way to cache them on disk. */
139 Log(LogDebug, "InfluxdbWriter")
140 << "Joining existing WQ tasks.";
144 /* Flush again after the WQ tasks have filled the data buffer. */
145 Log(LogDebug, "InfluxdbWriter")
146 << "Flushing data buffers from WQ tasks.";
150 Log(LogInformation, "InfluxdbWriter")
151 << "'" << GetName() << "' paused.";
153 ObjectImpl<InfluxdbWriter>::Pause();
156 void InfluxdbWriter::AssertOnWorkQueue()
158 ASSERT(m_WorkQueue.IsWorkerThread());
161 void InfluxdbWriter::ExceptionHandler(boost::exception_ptr exp)
163 Log(LogCritical, "InfluxdbWriter", "Exception during InfluxDB operation: Verify that your backend is operational!");
165 Log(LogDebug, "InfluxdbWriter")
166 << "Exception during InfluxDB operation: " << DiagnosticInformation(std::move(exp));
168 //TODO: Close the connection, if we keep it open.
171 OptionalTlsStream InfluxdbWriter::Connect()
173 Log(LogNotice, "InfluxdbWriter")
174 << "Reconnecting to InfluxDB on host '" << GetHost() << "' port '" << GetPort() << "'.";
176 OptionalTlsStream stream;
177 bool ssl = GetSslEnable();
180 std::shared_ptr<boost::asio::ssl::context> sslContext;
183 sslContext = MakeAsioSslContext(GetSslCert(), GetSslKey(), GetSslCaCert());
184 } catch (const std::exception& ex) {
185 Log(LogWarning, "InfluxdbWriter")
186 << "Unable to create SSL context.";
190 stream.first = std::make_shared<AsioTlsStream>(IoEngine::Get().GetIoContext(), *sslContext, GetHost());
192 stream.second = std::make_shared<AsioTcpStream>(IoEngine::Get().GetIoContext());
196 icinga::Connect(ssl ? stream.first->lowest_layer() : stream.second->lowest_layer(), GetHost(), GetPort());
197 } catch (const std::exception& ex) {
198 Log(LogWarning, "InfluxdbWriter")
199 << "Can't connect to InfluxDB on host '" << GetHost() << "' port '" << GetPort() << "'.";
204 auto& tlsStream (stream.first->next_layer());
207 tlsStream.handshake(tlsStream.client);
208 } catch (const std::exception& ex) {
209 Log(LogWarning, "InfluxdbWriter")
210 << "TLS handshake with host '" << GetHost() << "' failed.";
215 return std::move(stream);
218 void InfluxdbWriter::CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
223 m_WorkQueue.Enqueue(std::bind(&InfluxdbWriter::CheckResultHandlerWQ, this, checkable, cr), PriorityLow);
226 void InfluxdbWriter::CheckResultHandlerWQ(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
230 CONTEXT("Processing check result for '" + checkable->GetName() + "'");
232 if (!IcingaApplication::GetInstance()->GetEnablePerfdata() || !checkable->GetEnablePerfdata())
236 Service::Ptr service;
237 tie(host, service) = GetHostService(checkable);
239 MacroProcessor::ResolverList resolvers;
241 resolvers.emplace_back("service", service);
242 resolvers.emplace_back("host", host);
243 resolvers.emplace_back("icinga", IcingaApplication::GetInstance());
247 double ts = cr->GetExecutionEnd();
249 // Clone the template and perform an in-place macro expansion of measurement and tag values
250 Dictionary::Ptr tmpl_clean = service ? GetServiceTemplate() : GetHostTemplate();
251 Dictionary::Ptr tmpl = static_pointer_cast<Dictionary>(tmpl_clean->ShallowClone());
252 tmpl->Set("measurement", MacroProcessor::ResolveMacros(tmpl->Get("measurement"), resolvers, cr));
254 Dictionary::Ptr tagsClean = tmpl->Get("tags");
256 Dictionary::Ptr tags = new Dictionary();
259 ObjectLock olock(tagsClean);
260 for (const Dictionary::Pair& pair : tagsClean) {
261 String missing_macro;
262 Value value = MacroProcessor::ResolveMacros(pair.second, resolvers, cr, &missing_macro);
264 if (missing_macro.IsEmpty()) {
265 tags->Set(pair.first, value);
270 tmpl->Set("tags", tags);
273 CheckCommand::Ptr checkCommand = checkable->GetCheckCommand();
275 Array::Ptr perfdata = cr->GetPerformanceData();
278 ObjectLock olock(perfdata);
279 for (const Value& val : perfdata) {
280 PerfdataValue::Ptr pdv;
282 if (val.IsObjectType<PerfdataValue>())
286 pdv = PerfdataValue::Parse(val);
287 } catch (const std::exception&) {
288 Log(LogWarning, "InfluxdbWriter")
289 << "Ignoring invalid perfdata for checkable '"
290 << checkable->GetName() << "' and command '"
291 << checkCommand->GetName() << "' with value: " << val;
296 Dictionary::Ptr fields = new Dictionary();
297 fields->Set("value", pdv->GetValue());
299 if (GetEnableSendThresholds()) {
301 fields->Set("crit", pdv->GetCrit());
303 fields->Set("warn", pdv->GetWarn());
305 fields->Set("min", pdv->GetMin());
307 fields->Set("max", pdv->GetMax());
309 if (!pdv->GetUnit().IsEmpty()) {
310 fields->Set("unit", pdv->GetUnit());
313 SendMetric(checkable, tmpl, pdv->GetLabel(), fields, ts);
317 if (GetEnableSendMetadata()) {
319 Service::Ptr service;
320 tie(host, service) = GetHostService(checkable);
322 Dictionary::Ptr fields = new Dictionary();
325 fields->Set("state", new InfluxdbInteger(service->GetState()));
327 fields->Set("state", new InfluxdbInteger(host->GetState()));
329 fields->Set("current_attempt", new InfluxdbInteger(checkable->GetCheckAttempt()));
330 fields->Set("max_check_attempts", new InfluxdbInteger(checkable->GetMaxCheckAttempts()));
331 fields->Set("state_type", new InfluxdbInteger(checkable->GetStateType()));
332 fields->Set("reachable", checkable->IsReachable());
333 fields->Set("downtime_depth", new InfluxdbInteger(checkable->GetDowntimeDepth()));
334 fields->Set("acknowledgement", new InfluxdbInteger(checkable->GetAcknowledgement()));
335 fields->Set("latency", cr->CalculateLatency());
336 fields->Set("execution_time", cr->CalculateExecutionTime());
338 SendMetric(checkable, tmpl, Empty, fields, ts);
342 String InfluxdbWriter::EscapeKeyOrTagValue(const String& str)
344 // Iterate over the key name and escape commas and spaces with a backslash
346 boost::algorithm::replace_all(result, "\"", "\\\"");
347 boost::algorithm::replace_all(result, "=", "\\=");
348 boost::algorithm::replace_all(result, ",", "\\,");
349 boost::algorithm::replace_all(result, " ", "\\ ");
351 // InfluxDB 'feature': although backslashes are allowed in keys they also act
352 // as escape sequences when followed by ',' or ' '. When your tag is like
353 // 'metric=C:\' bad things happen. Backslashes themselves cannot be escaped
354 // and through experimentation they also escape '='. To be safe we replace
355 // trailing backslashes with and underscore.
356 // See https://github.com/influxdata/influxdb/issues/8587 for more info
357 size_t length = result.GetLength();
358 if (result[length - 1] == '\\')
359 result[length - 1] = '_';
364 String InfluxdbWriter::EscapeValue(const Value& value)
366 if (value.IsObjectType<InfluxdbInteger>()) {
367 std::ostringstream os;
368 os << static_cast<InfluxdbInteger::Ptr>(value)->GetValue() << "i";
372 if (value.IsBoolean())
373 return value ? "true" : "false";
375 if (value.IsString())
376 return "\"" + EscapeKeyOrTagValue(value) + "\"";
381 void InfluxdbWriter::SendMetric(const Checkable::Ptr& checkable, const Dictionary::Ptr& tmpl,
382 const String& label, const Dictionary::Ptr& fields, double ts)
384 std::ostringstream msgbuf;
385 msgbuf << EscapeKeyOrTagValue(tmpl->Get("measurement"));
387 Dictionary::Ptr tags = tmpl->Get("tags");
389 ObjectLock olock(tags);
390 for (const Dictionary::Pair& pair : tags) {
391 // Empty macro expansion, no tag
392 if (!pair.second.IsEmpty()) {
393 msgbuf << "," << EscapeKeyOrTagValue(pair.first) << "=" << EscapeKeyOrTagValue(pair.second);
398 // Label may be empty in the case of metadata
399 if (!label.IsEmpty())
400 msgbuf << ",metric=" << EscapeKeyOrTagValue(label);
407 ObjectLock fieldLock(fields);
408 for (const Dictionary::Pair& pair : fields) {
414 msgbuf << EscapeKeyOrTagValue(pair.first) << "=" << EscapeValue(pair.second);
418 msgbuf << " " << static_cast<unsigned long>(ts);
420 Log(LogDebug, "InfluxdbWriter")
421 << "Checkable '" << checkable->GetName() << "' adds to metric list:'" << msgbuf.str() << "'.";
423 // Buffer the data point
424 m_DataBuffer.emplace_back(msgbuf.str());
426 // Flush if we've buffered too much to prevent excessive memory use
427 if (static_cast<int>(m_DataBuffer.size()) >= GetFlushThreshold()) {
428 Log(LogDebug, "InfluxdbWriter")
429 << "Data buffer overflow writing " << m_DataBuffer.size() << " data points";
439 void InfluxdbWriter::FlushTimeout()
441 m_WorkQueue.Enqueue(boost::bind(&InfluxdbWriter::FlushTimeoutWQ, this), PriorityHigh);
444 void InfluxdbWriter::FlushTimeoutWQ()
448 Log(LogDebug, "InfluxdbWriter")
449 << "Timer expired writing " << m_DataBuffer.size() << " data points";
454 void InfluxdbWriter::Flush()
456 namespace beast = boost::beast;
457 namespace http = beast::http;
459 /* Flush can be called from 1) Timeout 2) Threshold 3) on shutdown/reload. */
460 if (m_DataBuffer.empty())
463 Log(LogDebug, "InfluxdbWriter")
464 << "Flushing data buffer to InfluxDB.";
466 String body = boost::algorithm::join(m_DataBuffer, "\n");
467 m_DataBuffer.clear();
469 OptionalTlsStream stream;
473 } catch (const std::exception& ex) {
474 Log(LogWarning, "InfluxDbWriter")
475 << "Flush failed, cannot connect to InfluxDB: " << DiagnosticInformation(ex, false);
479 Defer s ([&stream]() {
481 stream.first->next_layer().shutdown();
485 Url::Ptr url = new Url();
486 url->SetScheme(GetSslEnable() ? "https" : "http");
487 url->SetHost(GetHost());
488 url->SetPort(GetPort());
490 std::vector<String> path;
491 path.emplace_back("write");
494 url->AddQueryElement("db", GetDatabase());
495 url->AddQueryElement("precision", "s");
496 if (!GetUsername().IsEmpty())
497 url->AddQueryElement("u", GetUsername());
498 if (!GetPassword().IsEmpty())
499 url->AddQueryElement("p", GetPassword());
501 http::request<http::string_body> request (http::verb::post, std::string(url->Format(true)), 10);
503 request.set(http::field::user_agent, "Icinga/" + Application::GetAppVersion());
504 request.set(http::field::host, url->GetHost() + ":" + url->GetPort());
506 request.body() = body;
507 request.set(http::field::content_length, request.body().size());
511 http::write(*stream.first, request);
512 stream.first->flush();
514 http::write(*stream.second, request);
515 stream.second->flush();
517 } catch (const std::exception& ex) {
518 Log(LogWarning, "InfluxdbWriter")
519 << "Cannot write to TCP socket on host '" << GetHost() << "' port '" << GetPort() << "'.";
523 http::parser<false, http::string_body> parser;
524 beast::flat_buffer buf;
528 http::read(*stream.first, buf, parser);
530 http::read(*stream.second, buf, parser);
532 } catch (const std::exception& ex) {
533 Log(LogWarning, "InfluxdbWriter")
534 << "Failed to parse HTTP response from host '" << GetHost() << "' port '" << GetPort() << "': " << DiagnosticInformation(ex);
538 auto& response (parser.get());
540 if (response.result() != http::status::no_content) {
541 Log(LogWarning, "InfluxdbWriter")
542 << "Unexpected response code: " << response.result();
544 auto& contentType (response[http::field::content_type]);
545 if (contentType != "application/json") {
546 Log(LogWarning, "InfluxdbWriter")
547 << "Unexpected Content-Type: " << contentType;
551 Dictionary::Ptr jsonResponse;
552 auto& body (response.body());
555 jsonResponse = JsonDecode(body);
557 Log(LogWarning, "InfluxdbWriter")
558 << "Unable to parse JSON response:\n" << body;
562 String error = jsonResponse->Get("error");
564 Log(LogCritical, "InfluxdbWriter")
565 << "InfluxDB error message:\n" << error;
569 void InfluxdbWriter::ValidateHostTemplate(const Lazy<Dictionary::Ptr>& lvalue, const ValidationUtils& utils)
571 ObjectImpl<InfluxdbWriter>::ValidateHostTemplate(lvalue, utils);
573 String measurement = lvalue()->Get("measurement");
574 if (!MacroProcessor::ValidateMacroString(measurement))
575 BOOST_THROW_EXCEPTION(ValidationError(this, { "host_template", "measurement" }, "Closing $ not found in macro format string '" + measurement + "'."));
577 Dictionary::Ptr tags = lvalue()->Get("tags");
579 ObjectLock olock(tags);
580 for (const Dictionary::Pair& pair : tags) {
581 if (!MacroProcessor::ValidateMacroString(pair.second))
582 BOOST_THROW_EXCEPTION(ValidationError(this, { "host_template", "tags", pair.first }, "Closing $ not found in macro format string '" + pair.second));
587 void InfluxdbWriter::ValidateServiceTemplate(const Lazy<Dictionary::Ptr>& lvalue, const ValidationUtils& utils)
589 ObjectImpl<InfluxdbWriter>::ValidateServiceTemplate(lvalue, utils);
591 String measurement = lvalue()->Get("measurement");
592 if (!MacroProcessor::ValidateMacroString(measurement))
593 BOOST_THROW_EXCEPTION(ValidationError(this, { "service_template", "measurement" }, "Closing $ not found in macro format string '" + measurement + "'."));
595 Dictionary::Ptr tags = lvalue()->Get("tags");
597 ObjectLock olock(tags);
598 for (const Dictionary::Pair& pair : tags) {
599 if (!MacroProcessor::ValidateMacroString(pair.second))
600 BOOST_THROW_EXCEPTION(ValidationError(this, { "service_template", "tags", pair.first }, "Closing $ not found in macro format string '" + pair.second));