]> granicus.if.org Git - postgresql/commitdiff
Fix possible pg_basebackup failure on standby with "include WAL".
authorRobert Haas <rhaas@postgresql.org>
Thu, 27 Oct 2016 15:19:51 +0000 (11:19 -0400)
committerRobert Haas <rhaas@postgresql.org>
Thu, 27 Oct 2016 15:34:28 +0000 (11:34 -0400)
If a restartpoint flushed no dirty buffers, it could fail to update
the minimum recovery point, leading to a minimum recovery point prior
to the starting REDO location.  perform_base_backup() would interpret
that as meaning that no WAL files at all needed to be included in the
backup, failing an internal sanity check.  To fix, have restartpoints
always update the minimum recovery point to just after the checkpoint
record itself, so that the file (or files) containing the checkpoint
record will always be included in the backup.

Code by Amit Kapila, per a design suggestion by me, with some
additional work on the code comment by me.  Test case by Michael
Paquier.  Report by Kyotaro Horiguchi.

src/backend/access/transam/xlog.c
src/test/recovery/t/001_stream_rep.pl

index 0b991bb91debbee4a6a336435999ebf54e4e0ec8..f66f940f7e0b59491d55c9a0c840a6bdfb29552a 100644 (file)
@@ -612,11 +612,14 @@ typedef struct XLogCtlData
 
        /*
         * During recovery, we keep a copy of the latest checkpoint record here.
-        * Used by the background writer when it wants to create a restartpoint.
+        * lastCheckPointRecPtr points to start of checkpoint record and
+        * lastCheckPointEndPtr points to end+1 of checkpoint record.  Used by the
+        * background writer when it wants to create a restartpoint.
         *
         * Protected by info_lck.
         */
        XLogRecPtr      lastCheckPointRecPtr;
+       XLogRecPtr      lastCheckPointEndPtr;
        CheckPoint      lastCheckPoint;
 
        /*
@@ -8691,6 +8694,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
         */
        SpinLockAcquire(&XLogCtl->info_lck);
        XLogCtl->lastCheckPointRecPtr = ReadRecPtr;
+       XLogCtl->lastCheckPointEndPtr = EndRecPtr;
        XLogCtl->lastCheckPoint = *checkPoint;
        SpinLockRelease(&XLogCtl->info_lck);
 }
@@ -8710,6 +8714,7 @@ bool
 CreateRestartPoint(int flags)
 {
        XLogRecPtr      lastCheckPointRecPtr;
+       XLogRecPtr      lastCheckPointEndPtr;
        CheckPoint      lastCheckPoint;
        XLogRecPtr      PriorRedoPtr;
        TimestampTz xtime;
@@ -8723,6 +8728,7 @@ CreateRestartPoint(int flags)
        /* Get a local copy of the last safe checkpoint record. */
        SpinLockAcquire(&XLogCtl->info_lck);
        lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
+       lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
        lastCheckPoint = XLogCtl->lastCheckPoint;
        SpinLockRelease(&XLogCtl->info_lck);
 
@@ -8826,6 +8832,27 @@ CreateRestartPoint(int flags)
                ControlFile->checkPoint = lastCheckPointRecPtr;
                ControlFile->checkPointCopy = lastCheckPoint;
                ControlFile->time = (pg_time_t) time(NULL);
+
+               /*
+                * Ensure minRecoveryPoint is past the checkpoint record.  Normally,
+                * this will have happened already while writing out dirty buffers,
+                * but not necessarily - e.g. because no buffers were dirtied.  We do
+                * this because a non-exclusive base backup uses minRecoveryPoint to
+                * determine which WAL files must be included in the backup, and the
+                * file (or files) containing the checkpoint record must be included,
+                * at a minimum. Note that for an ordinary restart of recovery there's
+                * no value in having the minimum recovery point any earlier than this
+                * anyway, because redo will begin just after the checkpoint record.
+                */
+               if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
+               {
+                       ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
+                       ControlFile->minRecoveryPointTLI = lastCheckPoint.ThisTimeLineID;
+
+                       /* update local copy */
+                       minRecoveryPoint = ControlFile->minRecoveryPoint;
+                       minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+               }
                if (flags & CHECKPOINT_IS_SHUTDOWN)
                        ControlFile->state = DB_SHUTDOWNED_IN_RECOVERY;
                UpdateControlFile();
index fd71095f06ad37729b62a27dd84a0df973e9adbf..981c00ba3ec9a23abf364da69a7bb8e5dc5b4618 100644 (file)
@@ -24,6 +24,11 @@ $node_standby_1->start;
 # pg_basebackup works on a standby).
 $node_standby_1->backup($backup_name);
 
+# Take a second backup of the standby while the master is offline.
+$node_master->stop;
+$node_standby_1->backup('my_backup_2');
+$node_master->start;
+
 # Create second standby node linking to standby 1
 my $node_standby_2 = get_new_node('standby_2');
 $node_standby_2->init_from_backup($node_standby_1, $backup_name,