]> granicus.if.org Git - ejabberd/commitdiff
* src/ejabberd_s2s_out.erl: Fix long timeout when reconnecting s2s
authorBadlop <badlop@process-one.net>
Fri, 18 Apr 2008 12:19:45 +0000 (12:19 +0000)
committerBadlop <badlop@process-one.net>
Fri, 18 Apr 2008 12:19:45 +0000 (12:19 +0000)
after a remote server crash (EJAB-540)
* src/ejabberd_s2s_in.erl: Likewise
* src/ejabberd_s2s.erl: Likewise
* doc/guide.tex: Likewise

SVN Revision: 1296

ChangeLog
doc/guide.tex
src/ejabberd_s2s.erl
src/ejabberd_s2s_in.erl
src/ejabberd_s2s_out.erl

index 3be0790958e35587236f5b6d51914cbbee09242a..0063a4d1bcb0e31acb9ccc881d860a0618f8d297 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2008-04-18  Badlop  <badlop@process-one.net>
+
+       * src/ejabberd_s2s_out.erl: Fix long timeout when reconnecting s2s
+       after a remote server crash (EJAB-540)
+       * src/ejabberd_s2s_in.erl: Likewise
+       * src/ejabberd_s2s.erl: Likewise
+       * doc/guide.tex: Likewise
+
 2008-04-16  Badlop  <badlop@process-one.net>
 
        * doc/guide.tex: Clarification: PEP is enabled in default config
index 634bfdcc3a0583d26d29a4239240a88786dc7dcc..94dca9f21cda5c231c77c217a20c22b15cbcd169 100644 (file)
@@ -786,6 +786,9 @@ There are some additional global options:
   Defines if incoming and outgoing s2s connections with a specific remote host are allowed or denied.
   This allows to restrict ejabberd to only stablish s2s connections 
   with a small list of trusted servers, or to block some specific servers.
+  \titem{\{s2s\_max\_retry\_delay, Seconds\}} \ind{options!s2s\_max\_retry\_delay}
+  The maximum allowed delay for retry to connect after a failed connection attempt.
+  Specified in seconds. The default value is 300 seconds (5 minutes).
 \end{description}
 
 For example, the following simple configuration defines:
index c72af5aecae411d157a61404521d24d789cb4be0..b500388c5f1642338baf18a0770b1893f0ed9ca0 100644 (file)
@@ -34,6 +34,7 @@
         route/3,
         have_connection/1,
         has_key/2,
+        get_connections_pids/1,
         try_register/1,
         remove_connection/3,
         dirty_get_connections/0,
@@ -108,6 +109,14 @@ has_key(FromTo, Key) ->
            true
     end.
 
+get_connections_pids(FromTo) ->
+    case catch mnesia:dirty_read(s2s, FromTo) of
+       L when is_list(L) ->
+           [Connection#s2s.pid || Connection <- L];
+       _ ->
+           []
+    end.
+
 try_register(FromTo) ->
     Key = randoms:get_string(),
     MaxS2SConnectionsNumber = max_s2s_connections_number(FromTo),
index 623339845f2c8e488ce6bc43040df7adf7a6713c..cd57e3d1ecda6177d5326fe31c1e7b128ce0f64d 100644 (file)
@@ -352,6 +352,7 @@ stream_established({xmlstreamelement, El}, StateData) ->
             case {ejabberd_s2s:allow_host(To, From),
                   lists:member(LTo, ejabberd_router:dirty_get_all_domains())} of
                 {true, true} ->
+                   ejabberd_s2s_out:terminate_if_waiting_delay(To, From),
                    ejabberd_s2s_out:start(To, From,
                                           {verify, self(),
                                            Key, StateData#state.streamid}),
index 8eddc41ebf3b97dbfabde076e7013e6bdcd42a56..e5f8ed254d82dd3c02cf2e8f8f442c03ccf8c2ab 100644 (file)
@@ -33,6 +33,7 @@
 -export([start/3,
         start_link/3,
         start_connection/1,
+        terminate_if_waiting_delay/2,
         stop_connection/1]).
 
 %% p1_fsm callbacks (same as gen_fsm)
@@ -67,6 +68,7 @@
                db_enabled = true,
                try_auth = true,
                myname, server, queue,
+               delay_to_retry = undefined_delay,
                new = false, verify = false,
                timer}).
 
 %% -define(FSMLIMITS, [{max_queue, 2000}]).
 -define(FSMTIMEOUT, 5000).
 
+%% Maximum delay to wait before retrying to connect after a failed attempt.
+%% Specified in miliseconds. Default value is 5 minutes.
+-define(MAX_RETRY_DELAY, 300000).
+
 -define(STREAM_HEADER,
        "<?xml version='1.0'?>"
        "<stream:stream "
@@ -217,7 +223,7 @@ open_socket(init, StateData) ->
        {error, _Reason} ->
            ?INFO_MSG("s2s connection: ~s -> ~s (remote server not found)",
                      [StateData#state.myname, StateData#state.server]),
-           wait_before_reconnect(StateData, 300000)
+           wait_before_reconnect(StateData)
            %%{stop, normal, StateData}
     end;
 open_socket(stop, StateData) ->
@@ -768,6 +774,12 @@ handle_info({timeout, Timer, _}, _StateName,
     ?INFO_MSG("Closing connection with ~s: timeout", [StateData#state.server]),
     {stop, normal, StateData};
 
+handle_info(terminate_if_waiting_before_retry, wait_before_retry, StateData) ->
+    {stop, normal, StateData};
+
+handle_info(terminate_if_waiting_before_retry, StateName, StateData) ->
+    {next_state, StateName, StateData, get_timeout_interval(StateName)};
+
 handle_info(_, StateName, StateData) ->
     {next_state, StateName, StateData, get_timeout_interval(StateName)}.
 
@@ -989,7 +1001,7 @@ log_s2s_out(false, _, _) -> ok;
 log_s2s_out(_, Myname, Server) ->
     ?INFO_MSG("Trying to open s2s connection: ~s -> ~s",[Myname, Server]).
 
-%% Calcultate timeout depending on which state we are in:
+%% Calculate timeout depending on which state we are in:
 %% Can return integer > 0 | infinity
 get_timeout_interval(StateName) ->
     case StateName of
@@ -1005,11 +1017,45 @@ get_timeout_interval(StateName) ->
 
 %% This function is intended to be called at the end of a state
 %% function that want to wait for a reconnect delay before stopping.
-wait_before_reconnect(StateData, Delay) ->
+wait_before_reconnect(StateData) ->
     %% bounce queue manage by process and Erlang message queue
     bounce_queue(StateData#state.queue, ?ERR_REMOTE_SERVER_NOT_FOUND),
     bounce_messages(?ERR_REMOTE_SERVER_NOT_FOUND),
     cancel_timer(StateData#state.timer),
+    Delay = case StateData#state.delay_to_retry of
+               undefined_delay ->
+                   %% The initial delay is random between 1 and 15 seconds
+                   %% Return a random integer between 1000 and 15000
+                   {_, _, MicroSecs} = now(),
+                   (MicroSecs rem 14000) + 1000;
+               D1 ->
+                   %% Duplicate the delay with each successive failed
+                   %% reconnection attempt, but don't exceed the max
+                   lists:min([D1 * 2, get_max_retry_delay()])
+           end,
     Timer = erlang:start_timer(Delay, self(), []),
     {next_state, wait_before_retry, StateData#state{timer=Timer,
+                                                   delay_to_retry = Delay,
                                                    queue = queue:new()}}.
+
+%% @doc Get the maximum allowed delay for retry to reconnect (in miliseconds).
+%% The default value is 5 minutes.
+%% The option {s2s_max_retry_delay, Seconds} can be used (in seconds).
+%% @spec () -> integer()
+get_max_retry_delay() ->
+    case ejabberd_config:get_local_option(s2s_max_retry_delay) of
+       Seconds when is_integer(Seconds) ->
+           Seconds*1000;
+       _ ->
+           ?MAX_RETRY_DELAY
+    end.
+
+%% Terminate s2s_out connections that are in state wait_before_retry
+terminate_if_waiting_delay(From, To) ->
+    FromTo = {From, To},
+    Pids = ejabberd_s2s:get_connections_pids(FromTo),
+    lists:foreach(
+      fun(Pid) ->
+             Pid ! terminate_if_waiting_before_retry
+      end,
+      Pids).