]> granicus.if.org Git - postgresql/commitdiff
Back-patch assorted latch-related fixes.
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 10 Aug 2011 16:20:45 +0000 (12:20 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 10 Aug 2011 16:20:45 +0000 (12:20 -0400)
Fix a whole bunch of signal handlers that had been hacked to do things that
might change errno, without adding the necessary save/restore logic for
errno.  Also make some minor fixes in unix_latch.c, and clean up bizarre
and unsafe scheme for disowning the process's latch.  While at it, rename
the PGPROC latch field to procLatch for consistency with 9.2.

Issues noted while reviewing a patch by Peter Geoghegan.

src/backend/access/transam/xlog.c
src/backend/port/unix_latch.c
src/backend/replication/syncrep.c
src/backend/replication/walreceiver.c
src/backend/replication/walsender.c
src/backend/storage/lmgr/proc.c
src/backend/tcop/postgres.c
src/include/replication/syncrep.h
src/include/storage/proc.h

index fc3bed8d3f8d868b90525138552276c1eddd056b..9c8ef0260580bf524bb1021b8326ef5aeaff30fc 100644 (file)
@@ -9881,34 +9881,50 @@ startupproc_quickdie(SIGNAL_ARGS)
 static void
 StartupProcSigUsr1Handler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        latch_sigusr1_handler();
+
+       errno = save_errno;
 }
 
 /* SIGUSR2: set flag to finish recovery */
 static void
 StartupProcTriggerHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        promote_triggered = true;
        WakeupRecovery();
+
+       errno = save_errno;
 }
 
 /* SIGHUP: set flag to re-read config file at next convenient time */
 static void
 StartupProcSigHupHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        got_SIGHUP = true;
        WakeupRecovery();
+
+       errno = save_errno;
 }
 
 /* SIGTERM: set flag to abort redo and exit */
 static void
 StartupProcShutdownHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        if (in_restore_command)
                proc_exit(1);
        else
                shutdown_requested = true;
        WakeupRecovery();
+
+       errno = save_errno;
 }
 
 /* Handle SIGHUP and SIGTERM signals of startup process */
index 047e9def61b083478a27df9848693641715f0d80..19bb7def1f76951bfd456cbfec64dc59bd218ad5 100644 (file)
@@ -135,9 +135,12 @@ DisownLatch(volatile Latch *latch)
  * If the latch is already set, the function returns immediately.
  *
  * The 'timeout' is given in milliseconds, and -1 means wait forever.
- * On some platforms, signals cause the timeout to be restarted, so beware
- * that the function can sleep for several times longer than the specified
- * timeout.
+ * On some platforms, signals do not interrupt the wait, or even
+ * cause the timeout to be restarted, so beware that the function can sleep
+ * for several times longer than the requested timeout.  However, this
+ * difficulty is not so great as it seems, because the signal handlers for any
+ * signals that the caller should respond to ought to be programmed to end the
+ * wait by calling SetLatch.  Ideally, the timeout parameter is vestigial.
  *
  * The latch must be owned by the current process, ie. it must be a
  * backend-local latch initialized with InitLatch, or a shared latch
@@ -228,6 +231,7 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
                {
                        if (errno == EINTR)
                                continue;
+                       waiting = false;
                        ereport(ERROR,
                                        (errcode_for_socket_access(),
                                         errmsg("select() failed: %m")));
@@ -255,6 +259,10 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
  * Sets a latch and wakes up anyone waiting on it.
  *
  * This is cheap if the latch is already set, otherwise not so much.
+ *
+ * NB: when calling this in a signal handler, be sure to save and restore
+ * errno around it.  (That's standard practice in most signal handlers, of
+ * course, but we used to omit it in handlers that only set a flag.)
  */
 void
 SetLatch(volatile Latch *latch)
@@ -300,7 +308,10 @@ SetLatch(volatile Latch *latch)
        if (owner_pid == 0)
                return;
        else if (owner_pid == MyProcPid)
-               sendSelfPipeByte();
+       {
+               if (waiting)
+                       sendSelfPipeByte();
+       }
        else
                kill(owner_pid, SIGUSR1);
 }
@@ -332,7 +343,11 @@ ResetLatch(volatile Latch *latch)
  * SetLatch uses SIGUSR1 to wake up the process waiting on the latch.
  *
  * Wake up WaitLatch, if we're waiting.  (We might not be, since SIGUSR1 is
- * overloaded for multiple purposes.)
+ * overloaded for multiple purposes; or we might not have reached WaitLatch
+ * yet, in which case we don't need to fill the pipe either.)
+ *
+ * NB: when calling this in a signal handler, be sure to save and restore
+ * errno around it.
  */
 void
 latch_sigusr1_handler(void)
@@ -396,13 +411,19 @@ retry:
        }
 }
 
-/* Read all available data from the self-pipe */
+/*
+ * Read all available data from the self-pipe
+ *
+ * Note: this is only called when waiting = true.  If it fails and doesn't
+ * return, it must reset that flag first (though ideally, this will never
+ * happen).
+ */
 static void
 drainSelfPipe(void)
 {
        /*
         * There shouldn't normally be more than one byte in the pipe, or maybe a
-        * few more if multiple processes run SetLatch at the same instant.
+        * few bytes if multiple processes run SetLatch at the same instant.
         */
        char            buf[16];
        int                     rc;
@@ -417,9 +438,21 @@ drainSelfPipe(void)
                        else if (errno == EINTR)
                                continue;               /* retry */
                        else
+                       {
+                               waiting = false;
                                elog(ERROR, "read() on self-pipe failed: %m");
+                       }
                }
                else if (rc == 0)
+               {
+                       waiting = false;
                        elog(ERROR, "unexpected EOF on self-pipe");
+               }
+               else if (rc < sizeof(buf))
+               {
+                       /* we successfully drained the pipe; no need to read() again */
+                       break;
+               }
+               /* else buffer wasn't big enough, so read again */
        }
 }
index 8713b9700dcf70f5569f8744f23f05ea1aab750e..6463420cd85f420897d1292f9d98d28f28f67764 100644 (file)
@@ -111,9 +111,6 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
        Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));
        Assert(WalSndCtl != NULL);
 
-       /* Reset the latch before adding ourselves to the queue. */
-       ResetLatch(&MyProc->waitLatch);
-
        LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
        Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING);
 
@@ -167,7 +164,7 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
                int                     syncRepState;
 
                /* Must reset the latch before testing state. */
-               ResetLatch(&MyProc->waitLatch);
+               ResetLatch(&MyProc->procLatch);
 
                /*
                 * Try checking the state without the lock first.  There's no
@@ -247,11 +244,11 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
                }
 
                /*
-                * Wait on latch for up to 60 seconds. This allows us to check for
+                * Wait on latch for up to 10 seconds. This allows us to check for
                 * cancel/die signal or postmaster death regularly while waiting. Note
                 * that timeout here does not necessarily release from loop.
                 */
-               WaitLatch(&MyProc->waitLatch, 60000L);
+               WaitLatch(&MyProc->procLatch, 10000L);
        }
 
        /*
@@ -322,7 +319,7 @@ SyncRepCancelWait(void)
 }
 
 void
-SyncRepCleanupAtProcExit(int code, Datum arg)
+SyncRepCleanupAtProcExit(void)
 {
        if (!SHMQueueIsDetached(&(MyProc->syncRepLinks)))
        {
@@ -330,8 +327,6 @@ SyncRepCleanupAtProcExit(int code, Datum arg)
                SHMQueueDelete(&(MyProc->syncRepLinks));
                LWLockRelease(SyncRepLock);
        }
-
-       DisownLatch(&MyProc->waitLatch);
 }
 
 /*
@@ -560,9 +555,7 @@ SyncRepWakeQueue(bool all)
                /*
                 * Wake only when we have set state and removed from queue.
                 */
-               Assert(SHMQueueIsDetached(&(thisproc->syncRepLinks)));
-               Assert(thisproc->syncRepState == SYNC_REP_WAIT_COMPLETE);
-               SetLatch(&(thisproc->waitLatch));
+               SetLatch(&(thisproc->procLatch));
 
                numprocs++;
        }
index 471c844ab2a7d539467b80471c2209228fe57a7f..18b4727fa81fc661129440466d3b9d1061134fff 100644 (file)
@@ -373,11 +373,15 @@ WalRcvSigHupHandler(SIGNAL_ARGS)
 static void
 WalRcvShutdownHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        got_SIGTERM = true;
 
        /* Don't joggle the elbow of proc_exit */
        if (!proc_exit_inprogress && WalRcvImmediateInterruptOK)
                ProcessWalRcvInterrupts();
+
+       errno = save_errno;
 }
 
 /*
index aaa048525b4526c7615535e772710c40188138d7..bc7b3146b7bd6c7a363634b8a425a63feda1ad7d 100644 (file)
@@ -1188,18 +1188,26 @@ XLogSend(char *msgbuf, bool *caughtup)
 static void
 WalSndSigHupHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        got_SIGHUP = true;
        if (MyWalSnd)
                SetLatch(&MyWalSnd->latch);
+
+       errno = save_errno;
 }
 
 /* SIGTERM: set flag to shut down */
 static void
 WalSndShutdownHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        walsender_shutdown_requested = true;
        if (MyWalSnd)
                SetLatch(&MyWalSnd->latch);
+
+       errno = save_errno;
 }
 
 /*
@@ -1238,16 +1246,24 @@ WalSndQuickDieHandler(SIGNAL_ARGS)
 static void
 WalSndXLogSendHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        latch_sigusr1_handler();
+
+       errno = save_errno;
 }
 
 /* SIGUSR2: set flag to do a last cycle and shut down afterwards */
 static void
 WalSndLastCycleHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        walsender_ready_to_stop = true;
        if (MyWalSnd)
                SetLatch(&MyWalSnd->latch);
+
+       errno = save_errno;
 }
 
 /* Set up signal handlers */
index 9ef008e73317a5694521bbec22f8eb691d858b86..58d95bc0d6b5cee41e27f6073f3f33498b0df7d4 100644 (file)
@@ -187,7 +187,8 @@ InitProcGlobal(void)
        ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY;
 
        /*
-        * Pre-create the PGPROC structures and create a semaphore for each.
+        * Pre-create the PGPROC structures and create a semaphore and latch
+        * for each.
         */
        procs = (PGPROC *) ShmemAlloc((MaxConnections) * sizeof(PGPROC));
        if (!procs)
@@ -198,9 +199,9 @@ InitProcGlobal(void)
        for (i = 0; i < MaxConnections; i++)
        {
                PGSemaphoreCreate(&(procs[i].sem));
+               InitSharedLatch(&procs[i].procLatch);
                procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs;
                ProcGlobal->freeProcs = &procs[i];
-               InitSharedLatch(&procs[i].waitLatch);
        }
 
        /*
@@ -217,9 +218,9 @@ InitProcGlobal(void)
        for (i = 0; i < autovacuum_max_workers + 1; i++)
        {
                PGSemaphoreCreate(&(procs[i].sem));
+               InitSharedLatch(&procs[i].procLatch);
                procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs;
                ProcGlobal->autovacFreeProcs = &procs[i];
-               InitSharedLatch(&procs[i].waitLatch);
        }
 
        /*
@@ -230,7 +231,7 @@ InitProcGlobal(void)
        {
                AuxiliaryProcs[i].pid = 0;              /* marks auxiliary proc as not in use */
                PGSemaphoreCreate(&(AuxiliaryProcs[i].sem));
-               InitSharedLatch(&procs[i].waitLatch);
+               InitSharedLatch(&AuxiliaryProcs[i].procLatch);
        }
 
        /* Create ProcStructLock spinlock, too */
@@ -306,8 +307,8 @@ InitProcess(void)
                MarkPostmasterChildActive();
 
        /*
-        * Initialize all fields of MyProc, except for the semaphore which was
-        * prepared for us by InitProcGlobal.
+        * Initialize all fields of MyProc, except for the semaphore and latch,
+        * which were prepared for us by InitProcGlobal.
         */
        SHMQueueElemInit(&(MyProc->links));
        MyProc->waitStatus = STATUS_OK;
@@ -333,12 +334,17 @@ InitProcess(void)
                SHMQueueInit(&(MyProc->myProcLocks[i]));
        MyProc->recoveryConflictPending = false;
 
-       /* Initialise for sync rep */
+       /* Initialize fields for sync rep */
        MyProc->waitLSN.xlogid = 0;
        MyProc->waitLSN.xrecoff = 0;
        MyProc->syncRepState = SYNC_REP_NOT_WAITING;
        SHMQueueElemInit(&(MyProc->syncRepLinks));
-       OwnLatch(&MyProc->waitLatch);
+
+       /*
+        * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch.
+        * Note that there's no particular need to do ResetLatch here.
+        */
+       OwnLatch(&MyProc->procLatch);
 
        /*
         * We might be reusing a semaphore that belonged to a failed process. So
@@ -379,7 +385,6 @@ InitProcessPhase2(void)
        /*
         * Arrange to clean that up at backend exit.
         */
-       on_shmem_exit(SyncRepCleanupAtProcExit, 0);
        on_shmem_exit(RemoveProcFromArray, 0);
 }
 
@@ -454,8 +459,8 @@ InitAuxiliaryProcess(void)
        SpinLockRelease(ProcStructLock);
 
        /*
-        * Initialize all fields of MyProc, except for the semaphore which was
-        * prepared for us by InitProcGlobal.
+        * Initialize all fields of MyProc, except for the semaphore and latch,
+        * which were prepared for us by InitProcGlobal.
         */
        SHMQueueElemInit(&(MyProc->links));
        MyProc->waitStatus = STATUS_OK;
@@ -475,6 +480,12 @@ InitAuxiliaryProcess(void)
        for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
                SHMQueueInit(&(MyProc->myProcLocks[i]));
 
+       /*
+        * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch.
+        * Note that there's no particular need to do ResetLatch here.
+        */
+       OwnLatch(&MyProc->procLatch);
+
        /*
         * We might be reusing a semaphore that belonged to a failed process. So
         * be careful and reinitialize its value here.  (This is not strictly
@@ -677,6 +688,9 @@ ProcKill(int code, Datum arg)
 
        Assert(MyProc != NULL);
 
+       /* Make sure we're out of the sync rep lists */
+       SyncRepCleanupAtProcExit();
+
        /*
         * Release any LW locks I am holding.  There really shouldn't be any, but
         * it's cheap to check again before we cut the knees off the LWLock
@@ -684,6 +698,9 @@ ProcKill(int code, Datum arg)
         */
        LWLockReleaseAll();
 
+       /* Release ownership of the process's latch, too */
+       DisownLatch(&MyProc->procLatch);
+
        SpinLockAcquire(ProcStructLock);
 
        /* Return PGPROC structure (and semaphore) to appropriate freelist */
@@ -739,6 +756,9 @@ AuxiliaryProcKill(int code, Datum arg)
        /* Release any LW locks I am holding (see notes above) */
        LWLockReleaseAll();
 
+       /* Release ownership of the process's latch, too */
+       DisownLatch(&MyProc->procLatch);
+
        SpinLockAcquire(ProcStructLock);
 
        /* Mark auxiliary proc no longer in use */
index f035a48e9b4aa5e32c1b50926b68be0acc5bbf16..660a67412dcc53e3e474ef7c7baa6f3f37535988 100644 (file)
@@ -2643,11 +2643,12 @@ die(SIGNAL_ARGS)
                        InterruptHoldoffCount--;
                        ProcessInterrupts();
                }
-
-               /* Interrupt any sync rep wait which is currently in progress. */
-               SetLatch(&(MyProc->waitLatch));
        }
 
+       /* If we're still here, waken anything waiting on the process latch */
+       if (MyProc)
+               SetLatch(&MyProc->procLatch);
+
        errno = save_errno;
 }
 
@@ -2684,11 +2685,12 @@ StatementCancelHandler(SIGNAL_ARGS)
                        InterruptHoldoffCount--;
                        ProcessInterrupts();
                }
-
-               /* Interrupt any sync rep wait which is currently in progress. */
-               SetLatch(&(MyProc->waitLatch));
        }
 
+       /* If we're still here, waken anything waiting on the process latch */
+       if (MyProc)
+               SetLatch(&MyProc->procLatch);
+
        errno = save_errno;
 }
 
index efbebbcc06e38f731a323cde26c02d2302ef3a58..d71047e14703d68e21a945067fa9cf6739e98033 100644 (file)
@@ -33,8 +33,8 @@ extern char *SyncRepStandbyNames;
 /* called by user backend */
 extern void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN);
 
-/* callback at backend exit */
-extern void SyncRepCleanupAtProcExit(int code, Datum arg);
+/* called at backend exit */
+extern void SyncRepCleanupAtProcExit(void);
 
 /* called by wal sender */
 extern void SyncRepInitConfig(void);
index af9c1292fc828ecf15095f9dc4d9733ae162887f..56758da04f259d39d931b493d405169f420343b0 100644 (file)
@@ -118,13 +118,14 @@ struct PGPROC
        LOCKMASK        heldLocks;              /* bitmask for lock types already held on this
                                                                 * lock object by this backend */
 
+       Latch           procLatch;              /* generic latch for process */
+
        /*
         * Info to allow us to wait for synchronous replication, if needed.
         * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
         * syncRepState must not be touched except by owning process or WALSender.
         * syncRepLinks used only while holding SyncRepLock.
         */
-       Latch           waitLatch;              /* allow us to wait for sync rep */
        XLogRecPtr      waitLSN;                /* waiting for this LSN or higher */
        int                     syncRepState;   /* wait state for sync rep */
        SHM_QUEUE       syncRepLinks;   /* list link if process is in syncrep queue */