Reduce severity of 'XLogFlush: request is not satisfied' error condition,

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 14 Jan 2002 17:55:57 +0000 (17:55 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 14 Jan 2002 17:55:57 +0000 (17:55 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 14 Jan 2002 17:55:57 +0000 (17:55 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 14 Jan 2002 17:55:57 +0000 (17:55 +0000)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 46e3470ae384e6d514be676da39b27560f4b1694..a8e5e43a027e7ac756728ce90dc223659b732e17 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.85 2001/12/28 18:16:41 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.86 2002/01/14 17:55:57 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1262,15 +1262,40 @@ XLogFlush(XLogRecPtr record)
                         WriteRqst.Write = WriteRqstPtr;
                         WriteRqst.Flush = record;
                         XLogWrite(WriteRqst);
-                       if (XLByteLT(LogwrtResult.Flush, record))
-                               elog(STOP, "XLogFlush: request %X/%X is not satisfied --- flushed only to %X/%X",
-                                        record.xlogid, record.xrecoff,
-                                 LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
                 }
                 LWLockRelease(WALWriteLock);
         }
  
         END_CRIT_SECTION();
+
+       /*
+        * If we still haven't flushed to the request point then we have a
+        * problem; most likely, the requested flush point is past end of XLOG.
+        * This has been seen to occur when a disk page has a corrupted LSN.
+        *
+        * Formerly we treated this as a STOP condition, but that hurts the
+        * system's robustness rather than helping it: we do not want to take
+        * down the whole system due to corruption on one data page.  In
+        * particular, if the bad page is encountered again during recovery then
+        * we would be unable to restart the database at all!  (This scenario
+        * has actually happened in the field several times with 7.1 releases.
+        * Note that we cannot get here while InRedo is true, but if the bad
+        * page is brought in and marked dirty during recovery then
+        * CreateCheckpoint will try to flush it at the end of recovery.)
+        *
+        * The current approach is to ERROR under normal conditions, but only
+        * NOTICE during recovery, so that the system can be brought up even if
+        * there's a corrupt LSN.  Note that for calls from xact.c, the ERROR
+        * will be promoted to STOP since xact.c calls this routine inside a
+        * critical section.  However, calls from bufmgr.c are not within
+        * critical sections and so we will not force a restart for a bad LSN
+        * on a data page.
+        */
+       if (XLByteLT(LogwrtResult.Flush, record))
+               elog(InRecovery ? NOTICE : ERROR,
+                        "XLogFlush: request %X/%X is not satisfied --- flushed only to %X/%X",
+                        record.xlogid, record.xrecoff,
+                        LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
  }
  
  /*
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 14 Jan 2002 17:55:57 +0000 (17:55 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 14 Jan 2002 17:55:57 +0000 (17:55 +0000)