From 1201dc30b1245c193584c3eb2f7afee9d136abd6 Mon Sep 17 00:00:00 2001
From: Jim Jagielski <jim@apache.org>
Date: Wed, 23 Sep 2015 12:35:57 +0000
Subject: [PATCH] Merge r1664709, r1697323 from trunk:

 * Do not reset the retry timeout if the worker is in error at this stage even
   if the connection to the backend was successful. It was likely set into
   error by a different thread / process in parallel e.g. for a timeout or
   bad status. We should respect this and should not continue with a connection
   via this worker even if we got one.


* Do a more complete cleanup here. At this point we cannot end up with something useful with the data we created so far.
Submitted by: rpluem
Reviewed/backported by: jim


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1704835 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES                    |  3 ++
 STATUS                     | 22 +-------------
 modules/proxy/proxy_util.c | 60 +++++++++++++++++++++++---------------
 3 files changed, 41 insertions(+), 44 deletions(-)

diff --git a/CHANGES b/CHANGES
index 89fade278e..62b9950c3d 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,9 @@
 
 Changes with Apache 2.4.17
 
+  *) mod_proxy: Fix a race condition that caused a failed worker to be retried
+     before the retry period is over. [Ruediger Pluem]
+
   *) mod_autoindex: Allow autoindexes when neither mod_dir nor mod_mime are
      loaded. [Eric Covener]
 
diff --git a/STATUS b/STATUS
index ec4986b22a..ad3d2c91e7 100644
--- a/STATUS
+++ b/STATUS
@@ -108,28 +108,8 @@ RELEASE SHOWSTOPPERS:
 
 PATCHES ACCEPTED TO BACKPORT FROM TRUNK:
   [ start all new proposals below, under PATCHES PROPOSED. ]
-
-  *) mod_proxy: Fix a race condition that caused a failed worker to be retried
-     before the retry period is over
-      Trunk version of patch:
-         http://svn.apache.org/r1664709
-         http://svn.apache.org/r1697323
-      Backport version for 2.4.x of patch:
-         Trunk version of patch works modulo CHANGES
-      +1: rpluem, ylavic, jim
-      niq: 1. the if(worker->s->retries) {} and comment at line 2917
-              don't seem to make any sense.
-      rpluem: This is just taken over from existing code. It is just indented
-              differently hence part of the path I think it should be marked
-              as TODO section. But this should be subject to another
-              patch.
-           2. Re: error handline line 2930 - can PROXY_WORKER_IS_USABLE
-              not be tested BEFORE opening connection?
-      rpluem: We could, but we can catch more race cases with the current code
-              as it also catches the case where a connection establishment
-              took long and the worker went into error meanwhile.
  
-
+ 
 
 PATCHES PROPOSED TO BACKPORT FROM TRUNK:
   [ New proposals should be added at the end of the list ]
diff --git a/modules/proxy/proxy_util.c b/modules/proxy/proxy_util.c
index b35278a746..4163e9e54c 100644
--- a/modules/proxy/proxy_util.c
+++ b/modules/proxy/proxy_util.c
@@ -2826,33 +2826,47 @@ PROXY_DECLARE(int) ap_proxy_connect_backend(const char *proxy_function,
 
         connected    = 1;
     }
-    /*
-     * Put the entire worker to error state if
-     * the PROXY_WORKER_IGNORE_ERRORS flag is not set.
-     * Altrough some connections may be alive
-     * no further connections to the worker could be made
-     */
-    if (!connected && PROXY_WORKER_IS_USABLE(worker) &&
-        !(worker->s->status & PROXY_WORKER_IGNORE_ERRORS)) {
-        worker->s->error_time = apr_time_now();
-        worker->s->status |= PROXY_WORKER_IN_ERROR;
-        ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, APLOGNO(00959)
-            "ap_proxy_connect_backend disabling worker for (%s) for %"
-            APR_TIME_T_FMT "s",
-            worker->s->hostname, apr_time_sec(worker->s->retry));
+    if (PROXY_WORKER_IS_USABLE(worker)) {
+        /*
+         * Put the entire worker to error state if
+         * the PROXY_WORKER_IGNORE_ERRORS flag is not set.
+         * Although some connections may be alive
+         * no further connections to the worker could be made
+         */
+        if (!connected) {
+            if (!(worker->s->status & PROXY_WORKER_IGNORE_ERRORS)) {
+                worker->s->error_time = apr_time_now();
+                worker->s->status |= PROXY_WORKER_IN_ERROR;
+                ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, APLOGNO(00959)
+                    "ap_proxy_connect_backend disabling worker for (%s) for %"
+                    APR_TIME_T_FMT "s",
+                    worker->s->hostname, apr_time_sec(worker->s->retry));
+            }
+        }
+        else {
+            if (worker->s->retries) {
+                /*
+                 * A worker came back. So here is where we need to
+                 * either reset all params to initial conditions or
+                 * apply some sort of aging
+                 */
+            }
+            worker->s->error_time = 0;
+            worker->s->retries = 0;
+        }
+        return connected ? OK : DECLINED;
     }
     else {
-        if (worker->s->retries) {
-            /*
-             * A worker came back. So here is where we need to
-             * either reset all params to initial conditions or
-             * apply some sort of aging
-             */
+        /*
+         * The worker is in error likely done by a different thread / process
+         * e.g. for a timeout or bad status. We should respect this and should
+         * not continue with a connection via this worker even if we got one.
+         */
+        if (connected) {
+            socket_cleanup(conn);
         }
-        worker->s->error_time = 0;
-        worker->s->retries = 0;
+        return DECLINED;
     }
-    return connected ? OK : DECLINED;
 }
 
 static apr_status_t connection_shutdown(void *theconn)
-- 
2.40.0