]> granicus.if.org Git - icinga2/commitdiff
Improve DB IDO HA failover behaviour
authorMichael Friedrich <michael.friedrich@icinga.com>
Fri, 29 Mar 2019 14:53:56 +0000 (15:53 +0100)
committerMichael Friedrich <michael.friedrich@icinga.com>
Mon, 1 Apr 2019 06:50:00 +0000 (08:50 +0200)
- Decrease Object Authority updates to 10s (was 30s)
- Decrease failover timeout to 30s (was 60s)
- Decrease cold startup (after (re)start) with no OA updates to 30s (was 60s)
- Immediately connect on Resume()
- Fix query priority which got broken with #6970
- Add more logging when a failover is in progress

```
[2019-03-29 16:13:53 +0100] information/IdoMysqlConnection: Last update by endpoint 'master1' was 8.33246s ago (< failover timeout of 30s). Retrying.

[2019-03-29 16:14:23 +0100] information/IdoMysqlConnection: Last update by endpoint 'master1' was 38.3288s ago. Taking over 'ido-mysql' in HA zone 'master'.
```

- Add more logging for reconnect and disconnect handling
- Add 'last_failover' attribute to IDO*Connection objects

refs #6970

doc/09-object-types.md
lib/base/workqueue.hpp
lib/db_ido/dbconnection.cpp
lib/db_ido/dbconnection.ti
lib/db_ido_mysql/idomysqlconnection.cpp
lib/db_ido_pgsql/idopgsqlconnection.cpp
lib/remote/apilistener.cpp
lib/remote/authority.cpp

index 0e71df3311432725d32975fb156eb781919fd525..f3f69976f9b5bebad7b3f8a84ca5e747d3c8b542 100644 (file)
@@ -875,7 +875,7 @@ Configuration Attributes:
   instance\_name            | String                | **Optional.** Unique identifier for the local Icinga 2 instance. Defaults to `default`.
   instance\_description     | String                | **Optional.** Description for the Icinga 2 instance.
   enable\_ha                | Boolean               | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Defaults to `true`.
-  failover\_timeout         | Duration              | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 60s. Defaults to `60s`.
+  failover\_timeout         | Duration              | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 30s. Defaults to `30s`.
   cleanup                   | Dictionary            | **Optional.** Dictionary with items for historical table cleanup.
   categories                | Array                 | **Optional.** Array of information types that should be written to the database.
 
@@ -924,6 +924,12 @@ by Icinga Web 2 in the table above.
 In addition to the category flags listed above the `DbCatEverything`
 flag may be used as a shortcut for listing all flags.
 
+Runtime Attributes:
+
+  Name                        | Type                  | Description
+  ----------------------------|-----------------------|-----------------
+  last\_failover              | Timestamp             | When the last failover happened for this connection (only available with `enable_ha = true`.
+
 ## IdoPgsqlConnection <a id="objecttype-idopgsqlconnection"></a>
 
 IDO database adapter for PostgreSQL.
@@ -963,7 +969,7 @@ Configuration Attributes:
   instance\_name            | String                | **Optional.** Unique identifier for the local Icinga 2 instance. Defaults to `default`.
   instance\_description     | String                | **Optional.** Description for the Icinga 2 instance.
   enable\_ha                | Boolean               | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Defaults to `true`.
-  failover\_timeout         | Duration              | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 60s. Defaults to `60s`.
+  failover\_timeout         | Duration              | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 30s. Defaults to `30s`.
   cleanup                   | Dictionary            | **Optional.** Dictionary with items for historical table cleanup.
   categories                | Array                 | **Optional.** Array of information types that should be written to the database.
 
@@ -1012,6 +1018,12 @@ by Icinga Web 2 in the table above.
 In addition to the category flags listed above the `DbCatEverything`
 flag may be used as a shortcut for listing all flags.
 
+Runtime Attributes:
+
+  Name                        | Type                  | Description
+  ----------------------------|-----------------------|-----------------
+  last\_failover              | Timestamp             | When the last failover happened for this connection (only available with `enable_ha = true`.
+
 ## InfluxdbWriter <a id="objecttype-influxdbwriter"></a>
 
 Writes check result metrics and performance data to a defined InfluxDB host.
index ca9afb352b208f599eed6ddd170c455c27567bde..824d2e8243f42f1ab99b8c8fc513c49d56f79b9a 100644 (file)
@@ -19,9 +19,10 @@ namespace icinga
 
 enum WorkQueuePriority
 {
-       PriorityLow,
-       PriorityNormal,
-       PriorityHigh
+       PriorityLow = 0,
+       PriorityNormal = 1,
+       PriorityHigh = 2,
+       PriorityImmediate = 4
 };
 
 using TaskFunction = std::function<void ()>;
index 80c9fe3363fccd6fdb3e170069be065528fdc8ad..d86efa59638b7a399da0a58daa8268be84c5a73a 100644 (file)
@@ -435,8 +435,8 @@ void DbConnection::ValidateFailoverTimeout(const Lazy<double>& lvalue, const Val
 {
        ObjectImpl<DbConnection>::ValidateFailoverTimeout(lvalue, utils);
 
-       if (lvalue() < 60)
-               BOOST_THROW_EXCEPTION(ValidationError(this, { "failover_timeout" }, "Failover timeout minimum is 60s."));
+       if (lvalue() < 30)
+               BOOST_THROW_EXCEPTION(ValidationError(this, { "failover_timeout" }, "Failover timeout minimum is 30s."));
 }
 
 void DbConnection::ValidateCategories(const Lazy<Array::Ptr>& lvalue, const ValidationUtils& utils)
index 7a4b949cc6f369c8f5fa2d6c46ee511f45d54ff3..ad02b4094503bc0a89944fb727a933f96508cf76 100644 (file)
@@ -42,9 +42,11 @@ abstract class DbConnection : ConfigObject
        };
 
        [config] double failover_timeout {
-               default {{{ return 60; }}}
+               default {{{ return 30; }}}
        };
 
+       [state, no_user_modify] double last_failover;
+
        [no_user_modify] String schema_version;
        [no_user_modify] bool connected;
        [no_user_modify] bool should_connect {
index dcef1618bbf06ce7d7ab19f69fa6370ec3b2fb3d..d868ee01f76a85f9af9870b97538dd26f141945a 100644 (file)
@@ -65,8 +65,6 @@ void IdoMysqlConnection::StatsFunc(const Dictionary::Ptr& status, const Array::P
 
 void IdoMysqlConnection::Resume()
 {
-       DbConnection::Resume();
-
        Log(LogInformation, "IdoMysqlConnection")
                << "'" << GetName() << "' resumed.";
 
@@ -74,6 +72,9 @@ void IdoMysqlConnection::Resume()
 
        m_QueryQueue.SetExceptionCallback(std::bind(&IdoMysqlConnection::ExceptionHandler, this, _1));
 
+       /* Immediately try to connect on Resume() without timer. */
+       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityImmediate);
+
        m_TxTimer = new Timer();
        m_TxTimer->SetInterval(1);
        m_TxTimer->OnTimerExpired.connect(std::bind(&IdoMysqlConnection::TxTimerHandler, this));
@@ -83,23 +84,30 @@ void IdoMysqlConnection::Resume()
        m_ReconnectTimer->SetInterval(10);
        m_ReconnectTimer->OnTimerExpired.connect(std::bind(&IdoMysqlConnection::ReconnectTimerHandler, this));
        m_ReconnectTimer->Start();
-       m_ReconnectTimer->Reschedule(0);
+
+       /* Start with queries after connect. */
+       DbConnection::Resume();
 
        ASSERT(m_Mysql->thread_safe());
 }
 
 void IdoMysqlConnection::Pause()
 {
-       m_ReconnectTimer.reset();
+       Log(LogDebug, "IdoMysqlConnection")
+               << "Attempting to pause '" << GetName() << "'.";
 
        DbConnection::Pause();
 
+       m_ReconnectTimer.reset();
+
 #ifdef I2_DEBUG /* I2_DEBUG */
        Log(LogDebug, "IdoMysqlConnection")
                << "Rescheduling disconnect task.";
 #endif /* I2_DEBUG */
 
        m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Disconnect, this), PriorityLow);
+
+       /* Work on remaining tasks but never delete the threads, for HA resuming later. */
        m_QueryQueue.Join();
 
        Log(LogInformation, "IdoMysqlConnection")
@@ -137,6 +145,9 @@ void IdoMysqlConnection::Disconnect()
        m_Mysql->close(&m_Connection);
 
        SetConnected(false);
+
+       Log(LogInformation, "IdoMysqlConnection")
+               << "Disconnected from '" << GetName() << "' database '" << GetDatabase() << "'.";
 }
 
 void IdoMysqlConnection::TxTimerHandler()
@@ -154,8 +165,8 @@ void IdoMysqlConnection::NewTransaction()
                << "Scheduling new transaction and finishing async queries.";
 #endif /* I2_DEBUG */
 
-       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalNewTransaction, this), PriorityHigh);
-       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishAsyncQueries, this), PriorityHigh);
+       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalNewTransaction, this), PriorityNormal);
+       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishAsyncQueries, this), PriorityNormal);
 }
 
 void IdoMysqlConnection::InternalNewTransaction()
@@ -176,7 +187,8 @@ void IdoMysqlConnection::ReconnectTimerHandler()
                << "Scheduling reconnect task.";
 #endif /* I2_DEBUG */
 
-       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityHigh);
+       /* Only allow Reconnect events with high priority. */
+       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityImmediate);
 }
 
 void IdoMysqlConnection::Reconnect()
@@ -194,6 +206,7 @@ void IdoMysqlConnection::Reconnect()
 
        bool reconnect = false;
 
+       /* Ensure to close old connections first. */
        if (GetConnected()) {
                /* Check if we're really still connected */
                if (m_Mysql->ping(&m_Connection) == 0)
@@ -204,6 +217,9 @@ void IdoMysqlConnection::Reconnect()
                reconnect = true;
        }
 
+       Log(LogDebug, "IdoMysqlConnection")
+               << "Reconnect: Clearing ID cache.";
+
        ClearIDCache();
 
        String ihost, isocket_path, iuser, ipasswd, idb;
@@ -258,6 +274,9 @@ void IdoMysqlConnection::Reconnect()
                BOOST_THROW_EXCEPTION(std::runtime_error(m_Mysql->error(&m_Connection)));
        }
 
+       Log(LogNotice, "IdoMysqlConnection")
+               << "Reconnect: '" << GetName() << "' is now connected to database '" << GetDatabase() << "'.";
+
        SetConnected(true);
 
        IdoMysqlResult result = Query("SELECT @@global.max_allowed_packet AS max_allowed_packet");
@@ -343,12 +362,16 @@ void IdoMysqlConnection::Reconnect()
                        else
                                status_update_time = 0;
 
-                       double status_update_age = Utility::GetTime() - status_update_time;
+                       double now = Utility::GetTime();
+
+                       double status_update_age = now - status_update_time;
+                       double failoverTimeout = GetFailoverTimeout();
 
-                       Log(LogNotice, "IdoMysqlConnection")
-                               << "Last update by '" << endpoint_name << "' was " << status_update_age << "s ago.";
+                       if (status_update_age < failoverTimeout) {
+                               Log(LogInformation, "IdoMysqlConnection")
+                                       << "Last update by endpoint '" << endpoint_name << "' was "
+                                       << status_update_age << "s ago (< failover timeout of " << failoverTimeout << "s). Retrying.";
 
-                       if (status_update_age < GetFailoverTimeout()) {
                                m_Mysql->close(&m_Connection);
                                SetConnected(false);
                                SetShouldConnect(false);
@@ -366,9 +389,15 @@ void IdoMysqlConnection::Reconnect()
 
                                return;
                        }
+
+                       SetLastFailover(now);
+
+                       Log(LogInformation, "IdoMysqlConnection")
+                               << "Last update by endpoint '" << endpoint_name << "' was "
+                               << status_update_age << "s ago. Taking over '" << GetName() << "' in HA zone '" << Zone::GetLocalZone()->GetName() << "'.";
                }
 
-               Log(LogNotice, "IdoMysqlConnection", "Enabling IDO connection.");
+               Log(LogNotice, "IdoMysqlConnection", "Enabling IDO connection in HA zone.");
        }
 
        Log(LogInformation, "IdoMysqlConnection")
@@ -435,9 +464,9 @@ void IdoMysqlConnection::Reconnect()
                << "Scheduling session table clear and finish connect task.";
 #endif /* I2_DEBUG */
 
-       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityHigh);
+       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityNormal);
 
-       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityHigh);
+       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityNormal);
 }
 
 void IdoMysqlConnection::FinishConnect(double startTime)
@@ -450,7 +479,8 @@ void IdoMysqlConnection::FinishConnect(double startTime)
        FinishAsyncQueries();
 
        Log(LogInformation, "IdoMysqlConnection")
-               << "Finished reconnecting to MySQL IDO database in " << std::setw(2) << Utility::GetTime() - startTime << " second(s).";
+               << "Finished reconnecting to '" << GetName() << "' database '" << GetDatabase() << "' in "
+               << std::setw(2) << Utility::GetTime() - startTime << " second(s).";
 
        Query("COMMIT");
        Query("BEGIN");
@@ -710,7 +740,7 @@ void IdoMysqlConnection::ActivateObject(const DbObject::Ptr& dbobj)
                << "Scheduling object activation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'.";
 #endif /* I2_DEBUG */
 
-       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityHigh);
+       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityNormal);
 }
 
 void IdoMysqlConnection::InternalActivateObject(const DbObject::Ptr& dbobj)
@@ -755,7 +785,7 @@ void IdoMysqlConnection::DeactivateObject(const DbObject::Ptr& dbobj)
                << "Scheduling object deactivation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'.";
 #endif /* I2_DEBUG */
 
-       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityHigh);
+       m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityNormal);
 }
 
 void IdoMysqlConnection::InternalDeactivateObject(const DbObject::Ptr& dbobj)
index 25ce087986103ad5a1a563a1be33be742670c3ba..12bca3c359c2bd2fe96e9f5fb1a9bc3d0d72b495 100644 (file)
@@ -72,8 +72,6 @@ void IdoPgsqlConnection::StatsFunc(const Dictionary::Ptr& status, const Array::P
 
 void IdoPgsqlConnection::Resume()
 {
-       DbConnection::Resume();
-
        Log(LogInformation, "IdoPgsqlConnection")
                << "'" << GetName() << "' resumed.";
 
@@ -81,6 +79,9 @@ void IdoPgsqlConnection::Resume()
 
        m_QueryQueue.SetExceptionCallback(std::bind(&IdoPgsqlConnection::ExceptionHandler, this, _1));
 
+       /* Immediately try to connect on Resume() without timer. */
+       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Reconnect, this), PriorityImmediate);
+
        m_TxTimer = new Timer();
        m_TxTimer->SetInterval(1);
        m_TxTimer->OnTimerExpired.connect(std::bind(&IdoPgsqlConnection::TxTimerHandler, this));
@@ -90,23 +91,26 @@ void IdoPgsqlConnection::Resume()
        m_ReconnectTimer->SetInterval(10);
        m_ReconnectTimer->OnTimerExpired.connect(std::bind(&IdoPgsqlConnection::ReconnectTimerHandler, this));
        m_ReconnectTimer->Start();
-       m_ReconnectTimer->Reschedule(0);
+
+       /* Start with queries after connect. */
+       DbConnection::Resume();
 
        ASSERT(m_Pgsql->isthreadsafe());
 }
 
 void IdoPgsqlConnection::Pause()
 {
-       m_ReconnectTimer.reset();
-
        DbConnection::Pause();
 
+       m_ReconnectTimer.reset();
+
        m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Disconnect, this), PriorityLow);
+
+       /* Work on remaining tasks but never delete the threads, for HA resuming later. */
        m_QueryQueue.Join();
 
        Log(LogInformation, "IdoPgsqlConnection")
                << "'" << GetName() << "' paused.";
-
 }
 
 void IdoPgsqlConnection::ExceptionHandler(boost::exception_ptr exp)
@@ -138,6 +142,9 @@ void IdoPgsqlConnection::Disconnect()
 
        m_Pgsql->finish(m_Connection);
        SetConnected(false);
+
+       Log(LogInformation, "IdoPgsqlConnection")
+               << "Disconnected from '" << GetName() << "' database '" << GetDatabase() << "'.";
 }
 
 void IdoPgsqlConnection::TxTimerHandler()
@@ -150,7 +157,7 @@ void IdoPgsqlConnection::NewTransaction()
        if (IsPaused())
                return;
 
-       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalNewTransaction, this), PriorityHigh, true);
+       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalNewTransaction, this), PriorityNormal, true);
 }
 
 void IdoPgsqlConnection::InternalNewTransaction()
@@ -166,6 +173,7 @@ void IdoPgsqlConnection::InternalNewTransaction()
 
 void IdoPgsqlConnection::ReconnectTimerHandler()
 {
+       /* Only allow Reconnect events with high priority. */
        m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Reconnect, this), PriorityHigh);
 }
 
@@ -323,12 +331,16 @@ void IdoPgsqlConnection::Reconnect()
                        else
                                status_update_time = 0;
 
-                       double status_update_age = Utility::GetTime() - status_update_time;
+                       double now = Utility::GetTime();
 
-                       Log(LogNotice, "IdoPgsqlConnection")
-                               << "Last update by '" << endpoint_name << "' was " << status_update_age << "s ago.";
+                       double status_update_age = now - status_update_time;
+                       double failoverTimeout = GetFailoverTimeout();
 
                        if (status_update_age < GetFailoverTimeout()) {
+                               Log(LogInformation, "IdoPgsqlConnection")
+                                       << "Last update by endpoint '" << endpoint_name << "' was "
+                                       << status_update_age << "s ago (< failover timeout of " << failoverTimeout << "s). Retrying.";
+
                                m_Pgsql->finish(m_Connection);
                                SetConnected(false);
                                SetShouldConnect(false);
@@ -346,6 +358,12 @@ void IdoPgsqlConnection::Reconnect()
 
                                return;
                        }
+
+                       SetLastFailover(now);
+
+                       Log(LogInformation, "IdoPgsqlConnection")
+                               << "Last update by endpoint '" << endpoint_name << "' was "
+                               << status_update_age << "s ago. Taking over '" << GetName() << "' in HA zone '" << Zone::GetLocalZone()->GetName() << "'.";
                }
 
                Log(LogNotice, "IdoPgsqlConnection", "Enabling IDO connection.");
@@ -409,9 +427,9 @@ void IdoPgsqlConnection::Reconnect()
 
        UpdateAllObjects();
 
-       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::ClearTablesBySession, this), PriorityHigh);
+       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::ClearTablesBySession, this), PriorityNormal);
 
-       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::FinishConnect, this, startTime), PriorityHigh);
+       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::FinishConnect, this, startTime), PriorityNormal);
 }
 
 void IdoPgsqlConnection::FinishConnect(double startTime)
@@ -422,7 +440,8 @@ void IdoPgsqlConnection::FinishConnect(double startTime)
                return;
 
        Log(LogInformation, "IdoPgsqlConnection")
-               << "Finished reconnecting to PostgreSQL IDO database in " << std::setw(2) << Utility::GetTime() - startTime << " second(s).";
+               << "Finished reconnecting to '" << GetName() << "' database '" << GetDatabase() << "' in "
+               << std::setw(2) << Utility::GetTime() - startTime << " second(s).";
 
        Query("COMMIT");
        Query("BEGIN");
@@ -559,7 +578,7 @@ void IdoPgsqlConnection::ActivateObject(const DbObject::Ptr& dbobj)
        if (IsPaused())
                return;
 
-       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalActivateObject, this, dbobj), PriorityHigh);
+       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalActivateObject, this, dbobj), PriorityNormal);
 }
 
 void IdoPgsqlConnection::InternalActivateObject(const DbObject::Ptr& dbobj)
@@ -596,7 +615,7 @@ void IdoPgsqlConnection::DeactivateObject(const DbObject::Ptr& dbobj)
        if (IsPaused())
                return;
 
-       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalDeactivateObject, this, dbobj), PriorityHigh);
+       m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalDeactivateObject, this, dbobj), PriorityNormal);
 }
 
 void IdoPgsqlConnection::InternalDeactivateObject(const DbObject::Ptr& dbobj)
index 63c1e509c53dc555ad36d7d488f906abfe1fb4bc..a26846d38bfc0ef0689e051fe915428439e55921 100644 (file)
@@ -252,9 +252,13 @@ void ApiListener::Start(bool runtimeCreated)
        m_ReconnectTimer->Start();
        m_ReconnectTimer->Reschedule(0);
 
+       /* Keep this in relative sync with the cold startup in UpdateObjectAuthority() and the reconnect interval above.
+        * Previous: 60s reconnect, 30s OA, 60s cold startup.
+        * Now: 10s reconnect, 10s OA, 30s cold startup. 
+        */
        m_AuthorityTimer = new Timer();
        m_AuthorityTimer->OnTimerExpired.connect(std::bind(&ApiListener::UpdateObjectAuthority));
-       m_AuthorityTimer->SetInterval(30);
+       m_AuthorityTimer->SetInterval(10);
        m_AuthorityTimer->Start();
 
        m_CleanupCertificateRequestsTimer = new Timer();
index c532b97ade2f335b8207cd464717f40f256b39e0..23f571119afa49b9535d15bc95070339df6d016f 100644 (file)
@@ -4,11 +4,20 @@
 #include "remote/apilistener.hpp"
 #include "base/configtype.hpp"
 #include "base/utility.hpp"
+#include "base/convert.hpp"
 
 using namespace icinga;
 
 void ApiListener::UpdateObjectAuthority()
 {
+       ApiListener::Ptr instance = ApiListener::GetInstance();
+
+       if (!instance)
+               return;
+
+       Log(LogNotice, "ApiListener")
+               << "Updating object authority for objects at endpoint '" << instance->GetIdentity() << "'.";
+
        Zone::Ptr my_zone = Zone::GetLocalZone();
 
        std::vector<Endpoint::Ptr> endpoints;
@@ -30,7 +39,8 @@ void ApiListener::UpdateObjectAuthority()
 
                double mainTime = Application::GetMainTime();
 
-               if (num_total > 1 && endpoints.size() <= 1 && (mainTime == 0 || Utility::GetTime() - mainTime < 60))
+               /* 30 seconds cold startup, don't update any authority to give the secondary endpoint time to reconnect. */
+               if (num_total > 1 && endpoints.size() <= 1 && (mainTime == 0 || Utility::GetTime() - mainTime < 30))
                        return;
 
                std::sort(endpoints.begin(), endpoints.end(),
@@ -57,6 +67,12 @@ void ApiListener::UpdateObjectAuthority()
                        else
                                authority = endpoints[Utility::SDBM(object->GetName()) % endpoints.size()] == my_endpoint;
 
+#ifdef I2_DEBUG
+//                     //Enable on demand, causes heavy logging on each run.
+//                     Log(LogDebug, "ApiListener")
+//                             << "Setting authority '" << Convert::ToString(authority) << "' for object '" << object->GetName() << "' of type '" << object->GetReflectionType()->GetName() << "'.";
+#endif /* I2_DEBUG */
+
                        object->SetAuthority(authority);
                }
        }