]> granicus.if.org Git - postgresql/commitdiff
Perform a lot more sanity checks when freezing tuples.
authorAndres Freund <andres@anarazel.de>
Tue, 14 Nov 2017 02:45:47 +0000 (18:45 -0800)
committerAndres Freund <andres@anarazel.de>
Fri, 15 Dec 2017 02:20:48 +0000 (18:20 -0800)
The previous commit has shown that the sanity checks around freezing
aren't strong enough. Strengthening them seems especially important
because the existance of the bug has caused corruption that we don't
want to make even worse during future vacuum cycles.

The errors are emitted with ereport rather than elog, despite being
"should never happen" messages, so a proper error code is emitted. To
avoid superflous translations, mark messages as internal.

Author: Andres Freund and Alvaro Herrera
Reviewed-By: Alvaro Herrera, Michael Paquier
Discussion: https://postgr.es/m/20171102112019.33wb7g5wp4zpjelu@alap3.anarazel.de
Backpatch: 9.3-

src/backend/access/heap/heapam.c
src/backend/access/heap/rewriteheap.c
src/backend/commands/vacuumlazy.c
src/include/access/heapam.h
src/include/access/heapam_xlog.h

index f5db3c775c3417e3f7923d92ee5756adb5f68942..940aa26d120b57b1d430aeada788ab3a05caf3a4 100644 (file)
@@ -5440,6 +5440,7 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
  */
 static TransactionId
 FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
+                                 TransactionId relfrozenxid, TransactionId relminmxid,
                                  TransactionId cutoff_xid, MultiXactId cutoff_multi,
                                  uint16 *flags)
 {
@@ -5466,15 +5467,25 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
                *flags |= FRM_INVALIDATE_XMAX;
                return InvalidTransactionId;
        }
+       else if (MultiXactIdPrecedes(multi, relminmxid))
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                errmsg_internal("found multixact %u from before relminmxid %u",
+                                                                multi, relminmxid)));
        else if (MultiXactIdPrecedes(multi, cutoff_multi))
        {
                /*
-                * This old multi cannot possibly have members still running.  If it
-                * was a locker only, it can be removed without any further
-                * consideration; but if it contained an update, we might need to
-                * preserve it.
+                * This old multi cannot possibly have members still running, but
+                * verify just in case.  If it was a locker only, it can be removed
+                * without any further consideration; but if it contained an update, we
+                * might need to preserve it.
                 */
-               Assert(!MultiXactIdIsRunning(multi));
+               if (MultiXactIdIsRunning(multi))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_DATA_CORRUPTED),
+                                        errmsg_internal("multixact %u from before cutoff %u found to be still running",
+                                                                        multi, cutoff_multi)));
+
                if (HEAP_XMAX_IS_LOCKED_ONLY(t_infomask))
                {
                        *flags |= FRM_INVALIDATE_XMAX;
@@ -5488,13 +5499,22 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
                        /* wasn't only a lock, xid needs to be valid */
                        Assert(TransactionIdIsValid(xid));
 
+                       if (TransactionIdPrecedes(xid, relfrozenxid))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                                errmsg_internal("found update xid %u from before relfrozenxid %u",
+                                                                                xid, relfrozenxid)));
+
                        /*
                         * If the xid is older than the cutoff, it has to have aborted,
                         * otherwise the tuple would have gotten pruned away.
                         */
                        if (TransactionIdPrecedes(xid, cutoff_xid))
                        {
-                               Assert(!TransactionIdDidCommit(xid));
+                               if (TransactionIdDidCommit(xid))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_DATA_CORRUPTED),
+                                                        errmsg_internal("cannot freeze committed update xid %u", xid)));
                                *flags |= FRM_INVALIDATE_XMAX;
                                xid = InvalidTransactionId;             /* not strictly necessary */
                        }
@@ -5565,6 +5585,13 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
                {
                        TransactionId   xid = members[i].xid;
 
+                       Assert(TransactionIdIsValid(xid));
+                       if (TransactionIdPrecedes(xid, relfrozenxid))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                                errmsg_internal("found update xid %u from before relfrozenxid %u",
+                                                                                xid, relfrozenxid)));
+
                        /*
                         * It's an update; should we keep it?  If the transaction is known
                         * aborted then it's okay to ignore it, otherwise not.  However,
@@ -5598,6 +5625,26 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
                                Assert(!TransactionIdIsValid(update_xid));
                                update_xid = xid;
                        }
+                       else
+                       {
+                               /*
+                                * Not in progress, not committed -- must be aborted or crashed;
+                                * we can ignore it.
+                                */
+                       }
+
+                       /*
+                        * Since the tuple wasn't marked HEAPTUPLE_DEAD by vacuum, the
+                        * update Xid cannot possibly be older than the xid cutoff. The
+                        * presence of such a tuple would cause corruption, so be paranoid
+                        * and check.
+                        */
+                       if (TransactionIdIsValid(update_xid) &&
+                               TransactionIdPrecedes(update_xid, cutoff_xid))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                                errmsg_internal("found update xid %u from before xid cutoff %u",
+                                                                                update_xid, cutoff_xid)));
 
                        /*
                         * If we determined that it's an Xid corresponding to an update
@@ -5709,8 +5756,9 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
  * recovery.  We really need to remove old xids.
  */
 bool
-heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
-                                                 TransactionId cutoff_multi,
+heap_prepare_freeze_tuple(HeapTupleHeader tuple,
+                                                 TransactionId relfrozenxid, TransactionId relminmxid,
+                                                 TransactionId cutoff_xid, TransactionId cutoff_multi,
                                                  xl_heap_freeze_tuple *frz)
 
 {
@@ -5725,17 +5773,32 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
 
        /* Process xmin */
        xid = HeapTupleHeaderGetXmin(tuple);
-       if (TransactionIdIsNormal(xid) &&
-               TransactionIdPrecedes(xid, cutoff_xid))
+       if (TransactionIdIsNormal(xid))
        {
-               frz->frzflags |= XLH_FREEZE_XMIN;
+               if (TransactionIdPrecedes(xid, relfrozenxid))
+               {
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_DATA_CORRUPTED),
+                                        errmsg_internal("found xmin %u from before relfrozenxid %u",
+                                                                        xid, relfrozenxid)));
+               }
+               if (TransactionIdPrecedes(xid, cutoff_xid))
+               {
+                       if (!TransactionIdDidCommit(xid))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                                errmsg_internal("uncommitted xmin %u from before xid cutoff %u needs to be frozen",
+                                                                                xid, cutoff_xid)));
 
-               /*
-                * Might as well fix the hint bits too; usually XMIN_COMMITTED will
-                * already be set here, but there's a small chance not.
-                */
-               frz->t_infomask |= HEAP_XMIN_COMMITTED;
-               changed = true;
+                       frz->frzflags |= XLH_FREEZE_XMIN;
+
+                       /*
+                        * Might as well fix the hint bits too; usually XMIN_COMMITTED will
+                        * already be set here, but there's a small chance not.
+                        */
+                       frz->t_infomask |= HEAP_XMIN_COMMITTED;
+                       changed = true;
+               }
        }
 
        /*
@@ -5755,6 +5818,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
                uint16          flags;
 
                newxmax = FreezeMultiXactId(xid, tuple->t_infomask,
+                                                                       relfrozenxid, relminmxid,
                                                                        cutoff_xid, cutoff_multi, &flags);
 
                if (flags & FRM_INVALIDATE_XMAX)
@@ -5800,10 +5864,30 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
                        Assert(flags & FRM_NOOP);
                }
        }
-       else if (TransactionIdIsNormal(xid) &&
-                        TransactionIdPrecedes(xid, cutoff_xid))
+       else if (TransactionIdIsNormal(xid))
        {
-               freeze_xmax = true;
+               if (TransactionIdPrecedes(xid, relfrozenxid))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_DATA_CORRUPTED),
+                                        errmsg_internal("found xmax %u from before relfrozenxid %u",
+                                                                        xid, relfrozenxid)));
+
+               if (TransactionIdPrecedes(xid, cutoff_xid))
+               {
+                       /*
+                        * If we freeze xmax, make absolutely sure that it's not an XID
+                        * that is important.  (Note, a lock-only xmax can be removed
+                        * independent of committedness, since a committed lock holder has
+                        * released the lock).
+                        */
+                       if (!(tuple->t_infomask & HEAP_XMAX_LOCK_ONLY) &&
+                               TransactionIdDidCommit(xid))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                                errmsg_internal("cannot freeze committed xmax %u",
+                                                                                xid)));
+                       freeze_xmax = true;
+               }
        }
 
        if (freeze_xmax)
@@ -5900,13 +5984,16 @@ heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
  * Useful for callers like CLUSTER that perform their own WAL logging.
  */
 bool
-heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
-                                 TransactionId cutoff_multi)
+heap_freeze_tuple(HeapTupleHeader tuple,
+                                 TransactionId relfrozenxid, TransactionId relminmxid,
+                                 TransactionId cutoff_xid, TransactionId cutoff_multi)
 {
        xl_heap_freeze_tuple frz;
        bool            do_freeze;
 
-       do_freeze = heap_prepare_freeze_tuple(tuple, cutoff_xid, cutoff_multi,
+       do_freeze = heap_prepare_freeze_tuple(tuple,
+                                                                                 relfrozenxid, relminmxid,
+                                                                                 cutoff_xid, cutoff_multi,
                                                                                  &frz);
 
        /*
index 82fa7b2d53aad16f2f0a0d69b511d025f46f9a33..d05e1ae262ad4cf54e2e91d70c7296d9f598bdaa 100644 (file)
@@ -348,7 +348,10 @@ rewrite_heap_tuple(RewriteState state,
         * While we have our hands on the tuple, we may as well freeze any
         * very-old xmin or xmax, so that future VACUUM effort can be saved.
         */
-       heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid,
+       heap_freeze_tuple(new_tuple->t_data,
+                                         state->rs_old_rel->rd_rel->relfrozenxid,
+                                         state->rs_old_rel->rd_rel->relminmxid,
+                                         state->rs_freeze_xid,
                                          state->rs_cutoff_multi);
 
        /*
index 6ece3591e4f189b7019b193a7f64b3060bcb959f..e89c3a3bc7d4192f97de385b73255a5c0468738d 100644 (file)
@@ -420,6 +420,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                                blkno;
        HeapTupleData tuple;
        char       *relname;
+       TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
+       TransactionId relminmxid = onerel->rd_rel->relminmxid;
        BlockNumber empty_pages,
                                vacuumed_pages;
        double          num_tuples,
@@ -815,6 +817,13 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                                         * tuple, we choose to keep it, because it'll be a lot
                                         * cheaper to get rid of it in the next pruning pass than
                                         * to treat it like an indexed tuple.
+                                        *
+                                        * If this were to happen for a tuple that actually needed
+                                        * to be deleted, we'd be in trouble, because it'd
+                                        * possibly leave a tuple below the relation's xmin
+                                        * horizon alive.  heap_prepare_freeze_tuple() is prepared
+                                        * to detect that case and abort the transaction,
+                                        * preventing corruption.
                                         */
                                        if (HeapTupleIsHotUpdated(&tuple) ||
                                                HeapTupleIsHeapOnly(&tuple))
@@ -904,8 +913,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                                 * Each non-removable tuple must be checked to see if it needs
                                 * freezing.  Note we already have exclusive buffer lock.
                                 */
-                               if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit,
-                                                                                 MultiXactCutoff, &frozen[nfrozen]))
+                               if (heap_prepare_freeze_tuple(tuple.t_data,
+                                                                                         relfrozenxid, relminmxid,
+                                                                                         FreezeLimit, MultiXactCutoff,
+                                                                                         &frozen[nfrozen]))
                                        frozen[nfrozen++].offset = offnum;
                        }
                }                                               /* scan along page */
index 4a187f899cf8dabaf0bdc6521ee0e0e47c8535ac..9b2b58b3a6f7b3c97fcbe17d2531dc24abbe9486 100644 (file)
@@ -146,8 +146,9 @@ extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
                                bool follow_update,
                                Buffer *buffer, HeapUpdateFailureData *hufd);
 extern void heap_inplace_update(Relation relation, HeapTuple tuple);
-extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
-                                 TransactionId cutoff_multi);
+extern bool heap_freeze_tuple(HeapTupleHeader tuple,
+                                 TransactionId relfrozenxid, TransactionId relminmxid,
+                                 TransactionId cutoff_xid, TransactionId cutoff_multi);
 extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
                                                MultiXactId cutoff_multi, Buffer buf);
 
index ea7016bf47d3b6e8dd4ba49842442387487a3ede..8c03e111c45d55db99b0d500851006b44e655bea 100644 (file)
@@ -311,6 +311,8 @@ extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
                                TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples,
                                int ntuples);
 extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
+                                                 TransactionId relfrozenxid,
+                                                 TransactionId relminmxid,
                                                  TransactionId cutoff_xid,
                                                  TransactionId cutoff_multi,
                                                  xl_heap_freeze_tuple *frz);