]> granicus.if.org Git - postgresql/commitdiff
Prevent index-only scans from returning wrong answers under Hot Standby.
authorRobert Haas <rhaas@postgresql.org>
Fri, 27 Apr 2012 00:00:21 +0000 (20:00 -0400)
committerRobert Haas <rhaas@postgresql.org>
Fri, 27 Apr 2012 00:00:21 +0000 (20:00 -0400)
The alternative of disallowing index-only scans in HS operation was
discussed, but the consensus was that it was better to treat marking
a page all-visible as a recovery conflict for snapshots that could still
fail to see XIDs on that page.  We may in the future try to soften this,
so that we simply force index scans to do heap fetches in cases where
this may be an issue, rather than throwing a hard conflict.

src/backend/access/heap/heapam.c
src/backend/access/heap/visibilitymap.c
src/backend/commands/vacuumlazy.c
src/include/access/heapam.h
src/include/access/htup.h
src/include/access/visibilitymap.h
src/include/access/xlog_internal.h

index 98d1e559d322e3ca14aa1e292777d59a8e0eebae..3259354d5e08fa16458b2af6a01e493e758e3203 100644 (file)
@@ -4368,7 +4368,8 @@ log_heap_freeze(Relation reln, Buffer buffer,
  * and dirtied.
  */
 XLogRecPtr
-log_heap_visible(RelFileNode rnode, BlockNumber block, Buffer vm_buffer)
+log_heap_visible(RelFileNode rnode, BlockNumber block, Buffer vm_buffer,
+                                TransactionId cutoff_xid)
 {
        xl_heap_visible xlrec;
        XLogRecPtr      recptr;
@@ -4376,6 +4377,7 @@ log_heap_visible(RelFileNode rnode, BlockNumber block, Buffer vm_buffer)
 
        xlrec.node = rnode;
        xlrec.block = block;
+       xlrec.cutoff_xid = cutoff_xid;
 
        rdata[0].data = (char *) &xlrec;
        rdata[0].len = SizeOfHeapVisible;
@@ -4708,6 +4710,17 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
                return;
        page = (Page) BufferGetPage(buffer);
 
+       /*
+        * If there are any Hot Standby transactions running that have an xmin
+        * horizon old enough that this page isn't all-visible for them, they
+        * might incorrectly decide that an index-only scan can skip a heap fetch.
+        *
+        * NB: It might be better to throw some kind of "soft" conflict here that
+        * forces any index-only scan that is in flight to perform heap fetches,
+        * rather than killing the transaction outright.
+        */
+       ResolveRecoveryConflictWithSnapshot(xlrec->cutoff_xid, xlrec->node);
+
        LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
        /*
@@ -4760,7 +4773,8 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
                 * harm is done; and the next VACUUM will fix it.
                 */
                if (!XLByteLE(lsn, PageGetLSN(BufferGetPage(vmbuffer))))
-                       visibilitymap_set(reln, xlrec->block, lsn, vmbuffer);
+                       visibilitymap_set(reln, xlrec->block, lsn, vmbuffer,
+                                                         xlrec->cutoff_xid);
 
                ReleaseBuffer(vmbuffer);
                FreeFakeRelcacheEntry(reln);
index 6505e76daedde2f70092f00556b74c7eee1b491d..5696abe4d2a11cec7ab97e8525458dcc124dad39 100644 (file)
@@ -229,7 +229,9 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
  * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
  * or InvalidXLogRecPtr in normal running.  The page LSN is advanced to the
  * one provided; in normal running, we generate a new XLOG record and set the
- * page LSN to that value.
+ * page LSN to that value.  cutoff_xid is the largest xmin on the page being
+ * marked all-visible; it is needed for Hot Standby, and can be
+ * InvalidTransactionId if the page contains no tuples.
  *
  * You must pass a buffer containing the correct map page to this function.
  * Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -237,7 +239,7 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
  */
 void
 visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr,
-                                 Buffer buf)
+                                 Buffer buf, TransactionId cutoff_xid)
 {
        BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
        uint32          mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -269,7 +271,8 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr,
                if (RelationNeedsWAL(rel))
                {
                        if (XLogRecPtrIsInvalid(recptr))
-                               recptr = log_heap_visible(rel->rd_node, heapBlk, buf);
+                               recptr = log_heap_visible(rel->rd_node, heapBlk, buf,
+                                                                                 cutoff_xid);
                        PageSetLSN(page, recptr);
                        PageSetTLI(page, ThisTimeLineID);
                }
index 60171470d36fbba31b621ef15f12f517153fa440..0e0193d40e1818f4177bcb5bd68148c2739eb1ec 100644 (file)
@@ -448,6 +448,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                bool            all_visible_according_to_vm;
                bool            all_visible;
                bool            has_dead_tuples;
+               TransactionId visibility_cutoff_xid = InvalidTransactionId;
 
                if (blkno == next_not_all_visible_block)
                {
@@ -627,7 +628,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                        {
                                PageSetAllVisible(page);
                                MarkBufferDirty(buf);
-                               visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
+                               visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer,
+                                                                 InvalidTransactionId);
                        }
 
                        UnlockReleaseBuffer(buf);
@@ -759,6 +761,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                                                        all_visible = false;
                                                        break;
                                                }
+
+                                               /* Track newest xmin on page. */
+                                               if (TransactionIdFollows(xmin, visibility_cutoff_xid))
+                                                       visibility_cutoff_xid = xmin;
                                        }
                                        break;
                                case HEAPTUPLE_RECENTLY_DEAD:
@@ -853,7 +859,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                                PageSetAllVisible(page);
                                MarkBufferDirty(buf);
                        }
-                       visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
+                       visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer,
+                                                         visibility_cutoff_xid);
                }
 
                /*
index 9d5da7fb3a5981cef1d9f60cb17271a27f186798..d554392e5ae9722e8b77cf46e71cc13fb9f68736 100644 (file)
@@ -141,7 +141,7 @@ extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
                                TransactionId cutoff_xid,
                                OffsetNumber *offsets, int offcnt);
 extern XLogRecPtr log_heap_visible(RelFileNode rnode, BlockNumber block,
-                                Buffer vm_buffer);
+                                Buffer vm_buffer, TransactionId cutoff_xid);
 extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
                        BlockNumber blk, Page page);
 
index 6a3778d6508d6a52ae3bc8add42303b1b9dfd92a..8f65428ac4b59b62436564b39dea0b827745aa57 100644 (file)
@@ -788,9 +788,10 @@ typedef struct xl_heap_visible
 {
        RelFileNode node;
        BlockNumber block;
+       TransactionId cutoff_xid;
 } xl_heap_visible;
 
-#define SizeOfHeapVisible (offsetof(xl_heap_visible, block) + sizeof(BlockNumber))
+#define SizeOfHeapVisible (offsetof(xl_heap_visible, cutoff_xid) + sizeof(TransactionId))
 
 extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
                                                                           TransactionId *latestRemovedXid);
index 218ccd6104b05bd2ee6ca538cec3e99fb95713dc..5774e92e15fc9b0292577e637b194058a373dd1c 100644 (file)
@@ -25,7 +25,7 @@ extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
                                  Buffer *vmbuf);
 extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
 extern void visibilitymap_set(Relation rel, BlockNumber heapBlk,
-                                 XLogRecPtr recptr, Buffer vmbuf);
+                                 XLogRecPtr recptr, Buffer vmbuf, TransactionId cutoff_xid);
 extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
 extern BlockNumber visibilitymap_count(Relation rel);
 extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
index c079a9aa8f555c87c10efaa484d77c331d3edebd..2020a3b41fe5d28ea72ba8fbce7cce0a73aefdf7 100644 (file)
@@ -71,7 +71,7 @@ typedef struct XLogContRecord
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD070 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD071 /* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {