static XLogRecPtr ReadRecPtr; /* start of last record read */
static XLogRecPtr EndRecPtr; /* end+1 of last record read */
-static XLogRecPtr minRecoveryPoint; /* local copy of
- * ControlFile->minRecoveryPoint */
+/*
+ * Local copies of equivalent fields in the control file. When running
+ * crash recovery, minRecoveryPoint is set to InvalidXLogRecPtr as we
+ * expect to replay all the WAL available, and updateMinRecoveryPoint is
+ * switched to false to prevent any updates while replaying records.
+ * Those values are kept consistent as long as crash recovery runs.
+ */
+static XLogRecPtr minRecoveryPoint;
static TimeLineID minRecoveryPointTLI;
static bool updateMinRecoveryPoint = true;
if (!updateMinRecoveryPoint || (!force && lsn <= minRecoveryPoint))
return;
+ /*
+ * An invalid minRecoveryPoint means that we need to recover all the WAL,
+ * i.e., we're doing crash recovery. We never modify the control file's
+ * value in that case, so we can short-circuit future checks here too. The
+ * local values of minRecoveryPoint and minRecoveryPointTLI should not be
+ * updated until crash recovery finishes.
+ */
+ if (XLogRecPtrIsInvalid(minRecoveryPoint))
+ {
+ updateMinRecoveryPoint = false;
+ return;
+ }
+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
/* update local copy */
minRecoveryPoint = ControlFile->minRecoveryPoint;
minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
- /*
- * An invalid minRecoveryPoint means that we need to recover all the WAL,
- * i.e., we're doing crash recovery. We never modify the control file's
- * value in that case, so we can short-circuit future checks here too.
- */
- if (minRecoveryPoint == 0)
- updateMinRecoveryPoint = false;
- else if (force || minRecoveryPoint < lsn)
+ if (force || minRecoveryPoint < lsn)
{
XLogRecPtr newMinRecoveryPoint;
TimeLineID newMinRecoveryPointTLI;
*/
if (RecoveryInProgress())
{
- /* Quick exit if already known updated */
+ /*
+ * An invalid minRecoveryPoint means that we need to recover all the
+ * WAL, i.e., we're doing crash recovery. We never modify the control
+ * file's value in that case, so we can short-circuit future checks
+ * here too.
+ */
+ if (XLogRecPtrIsInvalid(minRecoveryPoint))
+ updateMinRecoveryPoint = false;
+
+ /* Quick exit if already known to be updated or cannot be updated */
if (record <= minRecoveryPoint || !updateMinRecoveryPoint)
return false;
minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
LWLockRelease(ControlFileLock);
- /*
- * An invalid minRecoveryPoint means that we need to recover all the
- * WAL, i.e., we're doing crash recovery. We never modify the control
- * file's value in that case, so we can short-circuit future checks
- * here too.
- */
- if (minRecoveryPoint == 0)
- updateMinRecoveryPoint = false;
-
/* check again */
- if (record <= minRecoveryPoint || !updateMinRecoveryPoint)
- return false;
- else
- return true;
+ return record > minRecoveryPoint;
}
/* Quick exit if already known flushed */
minRecoveryPoint = ControlFile->minRecoveryPoint;
minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+ /*
+ * The startup process can update its local copy of
+ * minRecoveryPoint from this point.
+ */
+ updateMinRecoveryPoint = true;
+
UpdateControlFile();
LWLockRelease(ControlFileLock);
/* No need to hold ControlFileLock yet, we aren't up far enough */
UpdateControlFile();
- /* initialize our local copy of minRecoveryPoint */
- minRecoveryPoint = ControlFile->minRecoveryPoint;
- minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+ /*
+ * Initialize our local copy of minRecoveryPoint. When doing crash
+ * recovery we want to replay up to the end of WAL. Particularly, in
+ * the case of a promoted standby minRecoveryPoint value in the
+ * control file is only updated after the first checkpoint. However,
+ * if the instance crashes before the first post-recovery checkpoint
+ * is completed then recovery will use a stale location causing the
+ * startup process to think that there are still invalid page
+ * references when checking for data consistency.
+ */
+ if (InArchiveRecovery)
+ {
+ minRecoveryPoint = ControlFile->minRecoveryPoint;
+ minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+ }
+ else
+ {
+ minRecoveryPoint = InvalidXLogRecPtr;
+ minRecoveryPointTLI = 0;
+ }
/*
* Reset pgstat data, because it may be invalid after recovery.
if (XLogRecPtrIsInvalid(minRecoveryPoint))
return;
+ Assert(InArchiveRecovery);
+
/*
* assume that we are called in the startup process, and hence don't need
* a lock to read lastReplayedEndRecPtr
* Update minRecoveryPoint to ensure that if recovery is aborted, we
* recover back up to this point before allowing hot standby again.
* This is important if the max_* settings are decreased, to ensure
- * you don't run queries against the WAL preceding the change.
+ * you don't run queries against the WAL preceding the change. The
+ * local copies cannot be updated as long as crash recovery is
+ * happening and we expect all the WAL to be replayed.
*/
- minRecoveryPoint = ControlFile->minRecoveryPoint;
- minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
- if (minRecoveryPoint != 0 && minRecoveryPoint < lsn)
+ if (InArchiveRecovery)
+ {
+ minRecoveryPoint = ControlFile->minRecoveryPoint;
+ minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+ }
+ if (minRecoveryPoint != InvalidXLogRecPtr && minRecoveryPoint < lsn)
{
ControlFile->minRecoveryPoint = lsn;
ControlFile->minRecoveryPointTLI = ThisTimeLineID;
--- /dev/null
+# Test for promotion handling with WAL records generated post-promotion
+# before the first checkpoint is generated. This test case checks for
+# invalid page references at replay based on the minimum consistent
+# recovery point defined.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 1;
+
+# Initialize primary node
+my $alpha = get_new_node('alpha');
+$alpha->init(allows_streaming => 1);
+# Setting wal_log_hints to off is important to get invalid page
+# references.
+$alpha->append_conf("postgresql.conf", <<EOF);
+wal_log_hints = off
+EOF
+
+# Start the primary
+$alpha->start;
+
+# setup/start a standby
+$alpha->backup('bkp');
+my $bravo = get_new_node('bravo');
+$bravo->init_from_backup($alpha, 'bkp', has_streaming => 1);
+$bravo->append_conf('postgresql.conf', <<EOF);
+checkpoint_timeout=1h
+checkpoint_completion_target=0.9
+EOF
+$bravo->start;
+
+# Dummy table for the upcoming tests.
+$alpha->safe_psql('postgres', 'create table test1 (a int)');
+$alpha->safe_psql('postgres', 'insert into test1 select generate_series(1, 10000)');
+
+# take a checkpoint
+$alpha->safe_psql('postgres', 'checkpoint');
+
+# The following vacuum will set visibility map bits and create
+# problematic WAL records.
+$alpha->safe_psql('postgres', 'vacuum verbose test1');
+# Wait for last record to have been replayed on the standby.
+$alpha->wait_for_catchup($bravo, 'replay',
+ $alpha->lsn('insert'));
+
+# Now force a checkpoint on the standby. This seems unnecessary but for "some"
+# reason, the previous checkpoint on the primary does not reflect on the standby
+# and without an explicit checkpoint, it may start redo recovery from a much
+# older point, which includes even create table and initial page additions.
+$bravo->safe_psql('postgres', 'checkpoint');
+
+# Now just use a dummy table and run some operations to move minRecoveryPoint
+# beyond the previous vacuum.
+$alpha->safe_psql('postgres', 'create table test2 (a int, b text)');
+$alpha->safe_psql('postgres', 'insert into test2 select generate_series(1,10000), md5(random()::text)');
+$alpha->safe_psql('postgres', 'truncate test2');
+
+# Wait again for all records to be replayed.
+$alpha->wait_for_catchup($bravo, 'replay',
+ $alpha->lsn('insert'));
+
+# Do the promotion, which reinitializes minRecoveryPoint in the control
+# file so as WAL is replayed up to the end.
+$bravo->promote;
+
+# Truncate the table on the promoted standby, vacuum and extend it
+# again to create new page references. The first post-recovery checkpoint
+# has not happened yet.
+$bravo->safe_psql('postgres', 'truncate test1');
+$bravo->safe_psql('postgres', 'vacuum verbose test1');
+$bravo->safe_psql('postgres', 'insert into test1 select generate_series(1,1000)');
+
+# Now crash-stop the promoted standby and restart. This makes sure that
+# replay does not see invalid page references because of an invalid
+# minimum consistent recovery point.
+$bravo->stop('immediate');
+$bravo->start;
+
+# Check state of the table after full crash recovery. All its data should
+# be here.
+my $psql_out;
+$bravo->psql(
+ 'postgres',
+ "SELECT count(*) FROM test1",
+ stdout => \$psql_out);
+is($psql_out, '1000', "Check that table state is correct");