]> granicus.if.org Git - postgresql/commitdiff
Teach RequestCheckpoint() to wait and retry a few times if it can't signal
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 23 Nov 2008 01:40:19 +0000 (01:40 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 23 Nov 2008 01:40:19 +0000 (01:40 +0000)
the bgwriter immediately.  This covers the case where the bgwriter is still
starting up, as seen in a recent buildfarm failure.  In future it might also
assist with clean recovery after a bgwriter termination and restart ---
right now the postmaster treats early bgwriter exit as a system crash,
but that might not always be so.

src/backend/postmaster/bgwriter.c

index 7d25811afd473681736842ea41392a1faa32a9f1..03f8f7e30f26f2fceffbffb1093dd1fee236d3b6 100644 (file)
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.53 2008/10/14 08:06:39 heikki Exp $
+ *       $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.54 2008/11/23 01:40:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -864,6 +864,7 @@ RequestCheckpoint(int flags)
 {
        /* use volatile pointer to prevent code rearrangement */
        volatile BgWriterShmemStruct *bgs = BgWriterShmem;
+       int                     ntries;
        int                     old_failed,
                                old_started;
 
@@ -905,15 +906,38 @@ RequestCheckpoint(int flags)
        SpinLockRelease(&bgs->ckpt_lck);
 
        /*
-        * Send signal to request checkpoint.  When not waiting, we consider
-        * failure to send the signal to be nonfatal.
+        * Send signal to request checkpoint.  It's possible that the bgwriter
+        * hasn't started yet, or is in process of restarting, so we will retry
+        * a few times if needed.  Also, if not told to wait for the checkpoint
+        * to occur, we consider failure to send the signal to be nonfatal and
+        * merely LOG it.
         */
-       if (BgWriterShmem->bgwriter_pid == 0)
-               elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
-                        "could not request checkpoint because bgwriter not running");
-       if (kill(BgWriterShmem->bgwriter_pid, SIGINT) != 0)
-               elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
-                        "could not signal for checkpoint: %m");
+       for (ntries = 0; ; ntries++)
+       {
+               if (BgWriterShmem->bgwriter_pid == 0)
+               {
+                       if (ntries >= 20)               /* max wait 2.0 sec */
+                       {
+                               elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
+                                        "could not request checkpoint because bgwriter not running");
+                               break;
+                       }
+               }
+               else if (kill(BgWriterShmem->bgwriter_pid, SIGINT) != 0)
+               {
+                       if (ntries >= 20)               /* max wait 2.0 sec */
+                       {
+                               elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
+                                        "could not signal for checkpoint: %m");
+                               break;
+                       }
+               }
+               else
+                       break;                          /* signal sent successfully */
+
+               CHECK_FOR_INTERRUPTS();
+               pg_usleep(100000L);             /* wait 0.1 sec, then retry */
+       }
 
        /*
         * If requested, wait for completion.  We detect completion according to