* None of this code is used during normal system operation.
*
*
- * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.70 2010/02/09 21:43:29 tgl Exp $
+ * src/backend/access/transam/xlogutils.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include "access/xlog.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
-#include "storage/bufmgr.h"
#include "storage/smgr.h"
#include "utils/guc.h"
#include "utils/hsearch.h"
static HTAB *invalid_page_tab = NULL;
+/* Report a reference to an invalid page */
+static void
+report_invalid_page(int elevel, RelFileNode node, ForkNumber forkno,
+ BlockNumber blkno, bool present)
+{
+ char *path = relpathperm(node, forkno);
+
+ if (present)
+ elog(elevel, "page %u of relation %s is uninitialized",
+ blkno, path);
+ else
+ elog(elevel, "page %u of relation %s does not exist",
+ blkno, path);
+ pfree(path);
+}
+
/* Log a reference to an invalid page */
static void
log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
xl_invalid_page *hentry;
bool found;
+ /*
+ * Once recovery has reached a consistent state, the invalid-page table
+ * should be empty and remain so. If a reference to an invalid page is
+ * found after consistency is reached, PANIC immediately. This might seem
+ * aggressive, but it's better than letting the invalid reference linger
+ * in the hash table until the end of recovery and PANIC there, which
+ * might come only much later if this is a standby server.
+ */
+ if (reachedConsistency)
+ {
+ report_invalid_page(WARNING, node, forkno, blkno, present);
+ elog(PANIC, "WAL contains references to invalid pages");
+ }
+
/*
* Log references to invalid pages at DEBUG1 level. This allows some
* tracing of the cause (note the elog context mechanism will tell us
* something about the XLOG record that generated the reference).
*/
if (log_min_messages <= DEBUG1 || client_min_messages <= DEBUG1)
- {
- char *path = relpath(node, forkno);
-
- if (present)
- elog(DEBUG1, "page %u of relation %s is uninitialized",
- blkno, path);
- else
- elog(DEBUG1, "page %u of relation %s does not exist",
- blkno, path);
- pfree(path);
- }
+ report_invalid_page(DEBUG1, node, forkno, blkno, present);
if (invalid_page_tab == NULL)
{
{
if (log_min_messages <= DEBUG2 || client_min_messages <= DEBUG2)
{
- char *path = relpath(hentry->key.node, forkno);
+ char *path = relpathperm(hentry->key.node, forkno);
elog(DEBUG2, "page %u of relation %s has been dropped",
hentry->key.blkno, path);
{
if (log_min_messages <= DEBUG2 || client_min_messages <= DEBUG2)
{
- char *path = relpath(hentry->key.node, hentry->key.forkno);
+ char *path = relpathperm(hentry->key.node, hentry->key.forkno);
elog(DEBUG2, "page %u of relation %s has been dropped",
hentry->key.blkno, path);
}
}
+/* Are there any unresolved references to invalid pages? */
+bool
+XLogHaveInvalidPages(void)
+{
+ if (invalid_page_tab != NULL &&
+ hash_get_num_entries(invalid_page_tab) > 0)
+ return true;
+ return false;
+}
+
/* Complain about any remaining invalid-page entries */
void
XLogCheckInvalidPages(void)
*/
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
{
- char *path = relpath(hentry->key.node, hentry->key.forkno);
-
- if (hentry->present)
- elog(WARNING, "page %u of relation %s was uninitialized",
- hentry->key.blkno, path);
- else
- elog(WARNING, "page %u of relation %s did not exist",
- hentry->key.blkno, path);
- pfree(path);
+ report_invalid_page(WARNING, hentry->key.node, hentry->key.forkno,
+ hentry->key.blkno, hentry->present);
foundone = true;
}
* LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), for reading from the main
* fork.
*
- * (Getting the lock is not really necessary, since we expect that this is
- * only used during single-process XLOG replay, but some subroutines such
- * as MarkBufferDirty will complain if we don't. And hopefully we'll get
- * hot standby support in the future, where there will be backends running
- * read-only queries during XLOG replay.)
+ * (Getting the buffer lock is not really necessary during single-process
+ * crash recovery, but some subroutines such as MarkBufferDirty will complain
+ * if we don't have the lock. In hot standby mode it's definitely necessary.)
*
* The returned buffer is exclusively-locked.
*
*
* In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the
* relation is extended with all-zeroes pages up to the given block number.
+ *
+ * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
+ * exist, and we don't check for all-zeroes. Thus, no log entry is made
+ * to imply that the page should be dropped or truncated later.
*/
Buffer
XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
Assert(blkno != P_NEW);
/* Open the relation at smgr level */
- smgr = smgropen(rnode);
+ smgr = smgropen(rnode, InvalidBackendId);
/*
* Create the target file if it doesn't already exist. This lets us cope
if (blkno < lastblock)
{
/* page exists in file */
- buffer = ReadBufferWithoutRelcache(rnode, false, forknum, blkno,
+ buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
mode, NULL);
}
else
log_invalid_page(rnode, forknum, blkno, false);
return InvalidBuffer;
}
+ if (mode == RBM_NORMAL_NO_LOG)
+ return InvalidBuffer;
/* OK to extend the file */
/* we do this in recovery only - no rel-extension lock needed */
Assert(InRecovery);
buffer = InvalidBuffer;
- while (blkno >= lastblock)
+ do
{
if (buffer != InvalidBuffer)
ReleaseBuffer(buffer);
- buffer = ReadBufferWithoutRelcache(rnode, false, forknum,
+ buffer = ReadBufferWithoutRelcache(rnode, forknum,
P_NEW, mode, NULL);
- lastblock++;
}
- Assert(BufferGetBlockNumber(buffer) == blkno);
+ while (BufferGetBlockNumber(buffer) < blkno);
+ /* Handle the corner case that P_NEW returns non-consecutive pages */
+ if (BufferGetBlockNumber(buffer) != blkno)
+ {
+ ReleaseBuffer(buffer);
+ buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
+ mode, NULL);
+ }
}
if (mode == RBM_NORMAL)
FakeRelCacheEntry fakeentry;
Relation rel;
+ Assert(InRecovery);
+
/* Allocate the Relation struct and all related space in one block. */
fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
rel = (Relation) fakeentry;
rel->rd_rel = &fakeentry->pgc;
rel->rd_node = rnode;
+ /* We will never be working with temp rels during recovery */
+ rel->rd_backend = InvalidBackendId;
+
+ /* It must be a permanent table if we're in recovery. */
+ rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
/* We don't know the name of the relation; use relfilenode instead */
sprintf(RelationGetRelationName(rel), "%u", rnode.relNode);
void
FreeFakeRelcacheEntry(Relation fakerel)
{
+ /* make sure the fakerel is not referenced by the SmgrRelation anymore */
+ if (fakerel->rd_smgr != NULL)
+ smgrclearowner(&fakerel->rd_smgr, fakerel->rd_smgr);
pfree(fakerel);
}