resolution during Hot Standby. Page reuse interlock requested by Tom.
Analysis and patch by me.
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.119 2010/02/13 00:59:58 sriggs Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
errhint("Please REINDEX it.")));
}
+/*
+ * Log the reuse of a page from the FSM.
+ */
+static void
+_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
+{
+ if (rel->rd_istemp)
+ return;
+
+ /* No ereport(ERROR) until changes are logged */
+ START_CRIT_SECTION();
+
+ /*
+ * We don't do MarkBufferDirty here because we're about initialise
+ * the page, and nobody else can see it yet.
+ */
+
+ /* XLOG stuff */
+ {
+ XLogRecPtr recptr;
+ XLogRecData rdata[1];
+ xl_btree_reuse_page xlrec_reuse;
+
+ xlrec_reuse.node = rel->rd_node;
+ xlrec_reuse.block = blkno;
+ xlrec_reuse.latestRemovedXid = latestRemovedXid;
+ rdata[0].data = (char *) &xlrec_reuse;
+ rdata[0].len = SizeOfBtreeReusePage;
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].next = NULL;
+
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
+
+ /*
+ * We don't do PageSetLSN or PageSetTLI here because
+ * we're about initialise the page, so no need.
+ */
+ }
+
+ END_CRIT_SECTION();
+}
+
/*
* _bt_getbuf() -- Get a buffer by block number for read or write.
*
{
page = BufferGetPage(buf);
if (_bt_page_recyclable(page))
- {
+ {
+ /*
+ * If we are generating WAL for Hot Standby then create
+ * a WAL record that will allow us to conflict with
+ * queries running on standby.
+ */
+ if (XLogStandbyInfoActive())
+ {
+ BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ _bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
+ }
+
/* Okay to use page. Re-initialize and return it */
_bt_pageinit(page, BufferGetPageSize(buf));
return buf;
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.61 2010/02/13 00:59:58 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
- /*
- * Btree delete records can conflict with standby queries. You might
- * think that vacuum records would conflict as well, but we've handled
- * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
- * cleaned by the vacuum of the heap and so we can resolve any conflicts
- * just once when that arrives. After that any we know that no conflicts
- * exist from individual btree vacuum records on that index.
- */
- if (InHotStandby && info == XLOG_BTREE_DELETE)
+ if (InHotStandby)
{
- xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
+ switch (info)
+ {
+ case XLOG_BTREE_DELETE:
+ /*
+ * Btree delete records can conflict with standby queries. You might
+ * think that vacuum records would conflict as well, but we've handled
+ * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
+ * cleaned by the vacuum of the heap and so we can resolve any conflicts
+ * just once when that arrives. After that any we know that no conflicts
+ * exist from individual btree vacuum records on that index.
+ */
+ {
+ xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
- /*
- * XXX Currently we put everybody on death row, because
- * currently _bt_delitems() supplies InvalidTransactionId.
- * This can be fairly painful, so providing a better value
- * here is worth some thought and possibly some effort to
- * improve.
- */
- ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
+ /*
+ * XXX Currently we put everybody on death row, because
+ * currently _bt_delitems() supplies InvalidTransactionId.
+ * This can be fairly painful, so providing a better value
+ * here is worth some thought and possibly some effort to
+ * improve.
+ */
+ ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
+ }
+ break;
+
+ case XLOG_BTREE_REUSE_PAGE:
+ /*
+ * Btree reuse page records exist to provide a conflict point when we
+ * reuse pages in the index via the FSM. That's all it does though.
+ */
+ {
+ xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
+
+ ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
+ }
+ return;
+
+ default:
+ break;
+ }
}
/*
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.128 2010/02/08 04:33:54 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.129 2010/02/13 00:59:58 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
#define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes
* parent half-dead */
#define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */
+#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from FSM */
/*
* All that we need to find changed index tuple
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId))
+/*
+ * This is what we need to know about page reuse within btree.
+ */
+typedef struct xl_btree_reuse_page
+{
+ RelFileNode node;
+ BlockNumber block;
+ TransactionId latestRemovedXid;
+} xl_btree_reuse_page;
+
+#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
+
/*
* This is what we need to know about vacuum of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a