]> granicus.if.org Git - pdns/commitdiff
Added Prometheus stats endpoint. Patch by Kai Storbeck <kai@xs4all.nl>
authorPavel Odintsov <pavel@cloudflare.com>
Thu, 30 Aug 2018 15:21:54 +0000 (16:21 +0100)
committerPavel Odintsov <pavel@cloudflare.com>
Thu, 30 Aug 2018 18:21:12 +0000 (19:21 +0100)
pdns/dnsdist-web.cc
pdns/dnsdistdist/docs/guides/webserver.rst

index 16ff2ff2eb68c5da22992d10ff83b5d190627a2e..f1d11693f7c90c84aa30bd86bb606d01593bfea7 100644 (file)
@@ -128,7 +128,7 @@ static bool isAnAPIRequestAllowedWithWebAuth(const YaHTTP::Request& req)
 
 static bool isAStatsRequest(const YaHTTP::Request& req)
 {
-  return req.url.path == "/jsonstat";
+  return req.url.path == "/jsonstat" || req.url.path == "/prometheus";
 }
 
 static bool compareAuthorization(const YaHTTP::Request& req, const string &expected_password, const string& expectedApiKey)
@@ -386,6 +386,89 @@ static void connectionThread(int sock, ComboAddress remote, string password, str
         resp.status=404;
       }
     }
+    else if(req.url.path=="/prometheus") {
+        handleCORS(req, resp);
+        resp.status=200;
+
+        ostringstream str;
+        for(const auto& e : g_stats.entries) {
+          string metricName = "dnsdist_main_" + std::get<0>(e);
+          boost::replace_all(metricName, "-", "_");
+
+          // for these we have the help and types encoded in the sources:
+          str<<"# HELP "<<metricName<<' '<< std::get<3>(e)<<"\n";
+          str<<"# TYPE "<<metricName<<' '<< std::get<2>(e)<<"\n";
+          str<<metricName<<' ';
+          if(const auto& val = boost::get<DNSDistStats::stat_t*>(&std::get<1>(e)))
+            str<<(*val)->load();
+          else if (const auto& dval = boost::get<double*>(&std::get<1>(e)))
+            str<<**dval;
+          else
+            str<<(*boost::get<DNSDistStats::statfunction_t>(&std::get<1>(e)))(std::get<0>(e));
+          str<<"\n";
+        }
+        const auto states = g_dstates.getCopy();
+        const string statesbase = "dnsdist_main_servers_";
+        for(const auto& state : states) {
+          string serverName = state->name.empty() ? (state->remote.toString() + ":" + std::to_string(state->remote.getPort())) : state->getName();
+          boost::replace_all(serverName, ".", "_");
+          const string label = "{server=\"" + serverName + "\"}";
+          str<<statesbase<<"queries"<<label<<' '<< state->queries.load() <<"\n";
+          str<<statesbase<<"drops"<<label<<' '<< state->reuseds.load() << "\n";
+          str<<statesbase<<"latency"<<label<<' '<< state->latencyUsec/1000.0 << "\n";
+          str<<statesbase<<"senderrors"<<label<<' '<< state->sendErrors.load() << "\n";
+          str<<statesbase<<"outstanding"<<label<<' '<< state->outstanding.load() << "\n";
+        }
+        for(const auto& front : g_frontends) {
+          if (front->udpFD == -1 && front->tcpFD == -1)
+            continue;
+
+          string frontName = front->local.toString() + ":" + std::to_string(front->local.getPort());
+          boost::replace_all(frontName, ".", "_");
+          string proto = (front->udpFD >= 0 ? "udp" : "tcp");
+          str<<"dnsdist_main_frontend_queries{frontend=\""<<frontName<<"\",proto=\""<<proto<<"\"} "<< front->queries.load() << "\n";
+        }
+        const auto localPools = g_pools.getCopy();
+        const string cachebase = "dnsdist_pool_";
+        for (const auto& entry : localPools) {
+          string poolName = entry.first;
+          boost::replace_all(poolName, ".", "_");
+          if (poolName.empty()) {
+            poolName = "_default_";
+          }
+          const string label = "{pool=\"" + poolName + "\"}";
+          const std::shared_ptr<ServerPool> pool = entry.second;
+          str<<"dnsdist_main_pools_servers"<<label<< ' ' << pool->servers.size() <<"\n";
+          if (pool->packetCache != nullptr) {
+            const auto& cache = pool->packetCache;
+            str<<cachebase<<"cache_size"<<label << ' ' << cache->getMaxEntries() << "\n";
+            str<<cachebase<<"cache_entries"<<label << ' ' << cache->getEntriesCount() << "\n";
+            str<<cachebase<<"cache_hits"<<label << ' ' << cache->getHits() << "\n";
+            str<<cachebase<<"cache_misses"<<label << ' ' << cache->getMisses() << "\n";
+            str<<cachebase<<"cache_deferred_inserts"<<label << ' ' << cache->getDeferredInserts() << "\n";
+            str<<cachebase<<"cache_deferred_lookups"<<label << ' ' << cache->getDeferredLookups() << "\n";
+            str<<cachebase<<"cache_lookup_collisions"<<label << ' ' << cache->getLookupCollisions() << "\n";
+            str<<cachebase<<"cache_insert_collisions"<<label << ' ' << cache->getInsertCollisions() << "\n";
+            str<<cachebase<<"cache_ttl_too_shorts"<<label << ' ' << cache->getTTLTooShorts() << "\n";
+          }
+        }
+
+        {
+          WriteLock wl(&g_qcount.queryLock);
+          std::string qname;
+          const string qnamebase = "dnsdist_querycount_queries";
+          for(auto &record: g_qcount.records) {
+            qname = record.first;
+            boost::replace_all(qname, ".", "_");
+           const string label = "{qname=\"" + qname + "\"}";
+            str<<qnamebase<<label<<' '<<record.second<<"\n";
+          }
+          g_qcount.records.clear();
+        }
+        resp.body=str.str();
+        resp.headers["Content-Type"] = "text/plain";
+    }
+
     else if(req.url.path=="/api/v1/servers/localhost") {
       handleCORS(req, resp);
       resp.status=200;
index 8e8bf9aeba6d0ea633d9b9cd71d76250142223ea..a8e73a1d20143d3a1594d6caaa1c04e886ada567 100644 (file)
@@ -99,6 +99,184 @@ URL Endpoints
 
   :query command: one of ``stats``, ``dynblocklist`` or ``ebpfblocklist``
 
+.. http:get:: /prometheus
+
+  Get statistics from dnsdist in `Prometheus <https://prometheus.io>`_ format.
+
+  **Example request**:
+
+   .. sourcecode:: http
+
+      GET /prometheus
+
+  **Example response**:
+   .. sourcecode:: http
+
+      HTTP/1.1 200 OK
+      Transfer-Encoding: chunked
+      Content-Security-Policy: default-src 'self'; style-src 'self' 'unsafe-inline'
+      Content-Type: text/plain
+      X-Content-Type-Options: nosniff
+      X-Frame-Options: deny
+      X-Permitted-Cross-Domain-Policies: none
+      X-Xss-Protection: 1; mode=block
+
+
+      # HELP dnsdist_main_responses Number of responses received from backends
+      # TYPE dnsdist_main_responses counter
+      dnsdist_main_responses 0
+      # HELP dnsdist_main_servfail_responses Number of SERVFAIL answers received from backends
+      # TYPE dnsdist_main_servfail_responses counter
+      dnsdist_main_servfail_responses 0
+      # HELP dnsdist_main_queries Number of received queries
+      # TYPE dnsdist_main_queries counter
+      dnsdist_main_queries 0
+      # HELP dnsdist_main_acl_drops Number of packets dropped because of the ACL
+      # TYPE dnsdist_main_acl_drops counter
+      dnsdist_main_acl_drops 0
+      # HELP dnsdist_main_rule_drop Number of queries dropped because of a rule
+      # TYPE dnsdist_main_rule_drop counter
+      dnsdist_main_rule_drop 0
+      # HELP dnsdist_main_rule_nxdomain Number of NXDomain answers returned because of a rule
+      # TYPE dnsdist_main_rule_nxdomain counter
+      dnsdist_main_rule_nxdomain 0
+      # HELP dnsdist_main_rule_refused Number of Refused answers returned because of a rule
+      # TYPE dnsdist_main_rule_refused counter
+      dnsdist_main_rule_refused 0
+      # HELP dnsdist_main_rule_servfail Number of SERVFAIL answers received because of a rule
+      # TYPE dnsdist_main_rule_servfail counter
+      dnsdist_main_rule_servfail 0
+      # HELP dnsdist_main_self_answered Number of self-answered responses
+      # TYPE dnsdist_main_self_answered counter
+      dnsdist_main_self_answered 0
+      # HELP dnsdist_main_downstream_timeouts Number of queries not answered in time by a backend
+      # TYPE dnsdist_main_downstream_timeouts counter
+      dnsdist_main_downstream_timeouts 0
+      # HELP dnsdist_main_downstream_send_errors Number of errors when sending a query to a backend
+      # TYPE dnsdist_main_downstream_send_errors counter
+      dnsdist_main_downstream_send_errors 0
+      # HELP dnsdist_main_trunc_failures Number of errors encountered while truncating an answer
+      # TYPE dnsdist_main_trunc_failures counter
+      dnsdist_main_trunc_failures 0
+      # HELP dnsdist_main_no_policy Number of queries dropped because no server was available
+      # TYPE dnsdist_main_no_policy counter
+      dnsdist_main_no_policy 0
+      # HELP dnsdist_main_latency0_1 Number of queries answered in less than 1ms
+      # TYPE dnsdist_main_latency0_1 counter
+      dnsdist_main_latency0_1 0
+      # HELP dnsdist_main_latency1_10 Number of queries answered in 1-10 ms
+      # TYPE dnsdist_main_latency1_10 counter
+      dnsdist_main_latency1_10 0
+      # HELP dnsdist_main_latency10_50 Number of queries answered in 10-50 ms
+      # TYPE dnsdist_main_latency10_50 counter
+      dnsdist_main_latency10_50 0
+      # HELP dnsdist_main_latency50_100 Number of queries answered in 50-100 ms
+      # TYPE dnsdist_main_latency50_100 counter
+      dnsdist_main_latency50_100 0
+      # HELP dnsdist_main_latency100_1000 Number of queries answered in 100-1000 ms
+      # TYPE dnsdist_main_latency100_1000 counter
+      dnsdist_main_latency100_1000 0
+      # HELP dnsdist_main_latency_slow Number of queries answered in more than 1 second
+      # TYPE dnsdist_main_latency_slow counter
+      dnsdist_main_latency_slow 0
+      # HELP dnsdist_main_latency_avg100 Average response latency in microseconds of the last 100 packets
+      # TYPE dnsdist_main_latency_avg100 gauge
+      dnsdist_main_latency_avg100 0
+      # HELP dnsdist_main_latency_avg1000 Average response latency in microseconds of the last 1000 packets
+      # TYPE dnsdist_main_latency_avg1000 gauge
+      dnsdist_main_latency_avg1000 0
+      # HELP dnsdist_main_latency_avg10000 Average response latency in microseconds of the last 10000 packets
+      # TYPE dnsdist_main_latency_avg10000 gauge
+      dnsdist_main_latency_avg10000 0
+      # HELP dnsdist_main_latency_avg1000000 Average response latency in microseconds of the last 1000000 packets
+      # TYPE dnsdist_main_latency_avg1000000 gauge
+      dnsdist_main_latency_avg1000000 0
+      # HELP dnsdist_main_uptime Uptime of the dnsdist process in seconds
+      # TYPE dnsdist_main_uptime gauge
+      dnsdist_main_uptime 42
+      # HELP dnsdist_main_real_memory_usage Current memory usage in bytes
+      # TYPE dnsdist_main_real_memory_usage gauge
+      dnsdist_main_real_memory_usage 11292672
+      # HELP dnsdist_main_noncompliant_queries Number of queries dropped as non-compliant
+      # TYPE dnsdist_main_noncompliant_queries counter
+      dnsdist_main_noncompliant_queries 0
+      # HELP dnsdist_main_noncompliant_responses Number of answers from a backend dropped as non-compliant
+      # TYPE dnsdist_main_noncompliant_responses counter
+      dnsdist_main_noncompliant_responses 0
+      # HELP dnsdist_main_rdqueries Number of received queries with the recursion desired bit set
+      # TYPE dnsdist_main_rdqueries counter
+      dnsdist_main_rdqueries 0
+      # HELP dnsdist_main_empty_queries Number of empty queries received from clients
+      # TYPE dnsdist_main_empty_queries counter
+      dnsdist_main_empty_queries 0
+      # HELP dnsdist_main_cache_hits Number of times an answer was retrieved from cache
+      # TYPE dnsdist_main_cache_hits counter
+      dnsdist_main_cache_hits 0
+      # HELP dnsdist_main_cache_misses Number of times an answer not found in the cache
+      # TYPE dnsdist_main_cache_misses counter
+      dnsdist_main_cache_misses 0
+      # HELP dnsdist_main_cpu_user_msec Milliseconds spent by dnsdist in the user state
+      # TYPE dnsdist_main_cpu_user_msec counter
+      dnsdist_main_cpu_user_msec 58
+      # HELP dnsdist_main_cpu_sys_msec Milliseconds spent by dnsdist in the system state
+      # TYPE dnsdist_main_cpu_sys_msec counter
+      dnsdist_main_cpu_sys_msec 35
+      # HELP dnsdist_main_fd_usage Number of currently used file descriptors
+      # TYPE dnsdist_main_fd_usage gauge
+      dnsdist_main_fd_usage 18
+      # HELP dnsdist_main_dyn_blocked Number of queries dropped because of a dynamic block
+      # TYPE dnsdist_main_dyn_blocked counter
+      dnsdist_main_dyn_blocked 0
+      # HELP dnsdist_main_dyn_block_nmg_size Number of dynamic blocks entries
+      # TYPE dnsdist_main_dyn_block_nmg_size gauge
+      dnsdist_main_dyn_block_nmg_size 0
+      dnsdist_main_servers_queries{server="9_9_9_9:53"} 0
+      dnsdist_main_servers_drops{server="9_9_9_9:53"} 0
+      dnsdist_main_servers_latency{server="9_9_9_9:53"} 0
+      dnsdist_main_servers_senderrors{server="9_9_9_9:53"} 0
+      dnsdist_main_servers_outstanding{server="9_9_9_9:53"} 0
+      dnsdist_main_servers_queries{server="8_8_8_8:53"} 0
+      dnsdist_main_servers_drops{server="8_8_8_8:53"} 0
+      dnsdist_main_servers_latency{server="8_8_8_8:53"} 0
+      dnsdist_main_servers_senderrors{server="8_8_8_8:53"} 0
+      dnsdist_main_servers_outstanding{server="8_8_8_8:53"} 0
+      dnsdist_main_servers_queries{server="::1:53"} 0
+      dnsdist_main_servers_drops{server="::1:53"} 0
+      dnsdist_main_servers_latency{server="::1:53"} 0
+      dnsdist_main_servers_senderrors{server="::1:53"} 0
+      dnsdist_main_servers_outstanding{server="::1:53"} 0
+      dnsdist_main_servers_queries{server="194_109_6_66:53"} 0
+      dnsdist_main_servers_drops{server="194_109_6_66:53"} 0
+      dnsdist_main_servers_latency{server="194_109_6_66:53"} 0
+      dnsdist_main_servers_senderrors{server="194_109_6_66:53"} 0
+      dnsdist_main_servers_outstanding{server="194_109_6_66:53"} 0
+      dnsdist_main_frontend_queries{frontend="127_0_0_1:5300",proto="udp"} 0
+      dnsdist_main_frontend_queries{frontend="127_0_0_1:5300",proto="tcp"} 0
+      dnsdist_main_pools_servers{pool="_default_"} 4
+      dnsdist_pool_cache_size{pool="_default_"} 1000
+      dnsdist_pool_cache_entries{pool="_default_"} 0
+      dnsdist_pool_cache_hits{pool="_default_"} 0
+      dnsdist_pool_cache_misses{pool="_default_"} 0
+      dnsdist_pool_cache_deferred_inserts{pool="_default_"} 0
+      dnsdist_pool_cache_deferred_lookups{pool="_default_"} 0
+      dnsdist_pool_cache_lookup_collisions{pool="_default_"} 0
+      dnsdist_pool_cache_insert_collisions{pool="_default_"} 0
+      dnsdist_pool_cache_ttl_too_shorts{pool="_default_"} 0
+
+  **Example prometheus configuration**:
+
+   This is just the scrape job description, for details see the prometheus documentation.
+
+   .. sourcecode:: yaml
+      job_name: dnsdist
+      scrape_interval: 10s
+      scrape_timeout: 2s
+      metrics_path: /prometheus
+      basic_auth:
+        username: dontcare
+        password: yoursecret
+
+
 .. http:get:: /api/v1/servers/localhost
 
   Get a quick overview of several parameters.