]> granicus.if.org Git - postgresql/commitdiff
Fix minor bug in logical-replication walsender shutdown
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Thu, 17 Oct 2019 13:06:06 +0000 (15:06 +0200)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Thu, 17 Oct 2019 13:06:06 +0000 (15:06 +0200)
Logical walsender should exit when it catches up with sending WAL during
shutdown; but there was a rare corner case when it failed to because of
a race condition that puts it back to wait for more WAL instead -- but
since there wasn't any, it'd not shut down immediately.  It would only
continue the shutdown when wal_sender_timeout terminates the sleep,
which causes annoying waits during shutdown procedure.  Restructure the
code so that we no longer forget to set WalSndCaughtUp in that case.

This was an oversight in commit c6c333436.

Backpatch all the way down to 9.4.

Author: Craig Ringer, Álvaro Herrera
Discussion: https://postgr.es/m/CAMsr+YEuz4XwZX_QmnX_-2530XhyAmnK=zCmicEnq1vLr0aZ-g@mail.gmail.com

src/backend/replication/walsender.c

index 92fa86fc9d51a556a12089dfd7ab6654b26c12aa..4523d4c81fcfc0a571cd3d72fd524609e3828429 100644 (file)
@@ -1296,7 +1296,6 @@ WalSndWaitForWal(XLogRecPtr loc)
        int                     wakeEvents;
        static XLogRecPtr RecentFlushPtr = InvalidXLogRecPtr;
 
-
        /*
         * Fast path to avoid acquiring the spinlock in case we already know we
         * have enough WAL available. This is particularly interesting if we're
@@ -2814,6 +2813,7 @@ XLogSendLogical(void)
 {
        XLogRecord *record;
        char       *errm;
+       XLogRecPtr      flushPtr;
 
        /*
         * Don't know whether we've caught up yet. We'll set WalSndCaughtUp to
@@ -2830,11 +2830,13 @@ XLogSendLogical(void)
        if (errm != NULL)
                elog(ERROR, "%s", errm);
 
+       /*
+        * We'll use the current flush point to determine whether we've caught up.
+        */
+       flushPtr = GetFlushRecPtr();
+
        if (record != NULL)
        {
-               /* XXX: Note that logical decoding cannot be used while in recovery */
-               XLogRecPtr      flushPtr = GetFlushRecPtr();
-
                /*
                 * Note the lack of any call to LagTrackerWrite() which is handled by
                 * WalSndUpdateProgress which is called by output plugin through
@@ -2843,32 +2845,19 @@ XLogSendLogical(void)
                LogicalDecodingProcessRecord(logical_decoding_ctx, logical_decoding_ctx->reader);
 
                sentPtr = logical_decoding_ctx->reader->EndRecPtr;
-
-               /*
-                * If we have sent a record that is at or beyond the flushed point, we
-                * have caught up.
-                */
-               if (sentPtr >= flushPtr)
-                       WalSndCaughtUp = true;
        }
-       else
-       {
-               /*
-                * If the record we just wanted read is at or beyond the flushed
-                * point, then we're caught up.
-                */
-               if (logical_decoding_ctx->reader->EndRecPtr >= GetFlushRecPtr())
-               {
-                       WalSndCaughtUp = true;
 
-                       /*
-                        * Have WalSndLoop() terminate the connection in an orderly
-                        * manner, after writing out all the pending data.
-                        */
-                       if (got_STOPPING)
-                               got_SIGUSR2 = true;
-               }
-       }
+       /* Set flag if we're caught up. */
+       if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr)
+               WalSndCaughtUp = true;
+
+       /*
+        * If we're caught up and have been requested to stop, have WalSndLoop()
+        * terminate the connection in an orderly manner, after writing out all
+        * the pending data.
+        */
+       if (WalSndCaughtUp && got_STOPPING)
+               got_SIGUSR2 = true;
 
        /* Update shared memory status */
        {