]> granicus.if.org Git - postgresql/commitdiff
While waiting for a condition variable, detect postmaster death.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 9 Jan 2018 17:34:46 +0000 (12:34 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 9 Jan 2018 17:34:57 +0000 (12:34 -0500)
The general assumption for postmaster child processes is that they
should just exit(1), reasonably promptly, if the postmaster disappears.
condition_variable.c neglected this consideration and could be left
waiting forever, if the counterpart process it is waiting for has
done the right thing and exited.

We had some discussion of adjusting the WaitEventSet API to make it
harder to make this type of mistake in future; but for the moment,
and for v10, let's make this narrow fix.

Discussion: https://postgr.es/m/20412.1515456143@sss.pgh.pa.us

src/backend/storage/lmgr/condition_variable.c

index 25c5cd7b45b02cbae571daee3c15c50e0f687853..ef1d5baf0163231dfc98e110ad869588babd4dde 100644 (file)
@@ -69,9 +69,11 @@ ConditionVariablePrepareToSleep(ConditionVariable *cv)
        {
                WaitEventSet *new_event_set;
 
-               new_event_set = CreateWaitEventSet(TopMemoryContext, 1);
+               new_event_set = CreateWaitEventSet(TopMemoryContext, 2);
                AddWaitEventToSet(new_event_set, WL_LATCH_SET, PGINVALID_SOCKET,
                                                  MyLatch, NULL);
+               AddWaitEventToSet(new_event_set, WL_POSTMASTER_DEATH, PGINVALID_SOCKET,
+                                                 NULL, NULL);
                /* Don't set cv_wait_event_set until we have a correct WES. */
                cv_wait_event_set = new_event_set;
        }
@@ -149,11 +151,20 @@ ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
                CHECK_FOR_INTERRUPTS();
 
                /*
-                * Wait for latch to be set.  We don't care about the result because
-                * our contract permits spurious returns.
+                * Wait for latch to be set.  (If we're awakened for some other
+                * reason, the code below will cope anyway.)
                 */
                WaitEventSetWait(cv_wait_event_set, -1, &event, 1, wait_event_info);
 
+               if (event.events & WL_POSTMASTER_DEATH)
+               {
+                       /*
+                        * Emergency bailout if postmaster has died.  This is to avoid the
+                        * necessity for manual cleanup of all postmaster children.
+                        */
+                       exit(1);
+               }
+
                /* Reset latch before examining the state of the wait list. */
                ResetLatch(MyLatch);