]> granicus.if.org Git - postgresql/commitdiff
Fix pruning of locked and updated tuples.
authorAndres Freund <andres@anarazel.de>
Fri, 3 Nov 2017 14:52:29 +0000 (07:52 -0700)
committerAndres Freund <andres@anarazel.de>
Fri, 15 Dec 2017 02:20:48 +0000 (18:20 -0800)
Previously it was possible that a tuple was not pruned during vacuum,
even though its update xmax (i.e. the updating xid in a multixact with
both key share lockers and an updater) was below the cutoff horizon.

As the freezing code assumed, rightly so, that that's not supposed to
happen, xmax would be preserved (as a member of a new multixact or
xmax directly). That causes two problems: For one the tuple is below
the xmin horizon, which can cause problems if the clog is truncated or
once there's an xid wraparound. The bigger problem is that that will
break HOT chains, which in turn can lead two to breakages: First,
failing index lookups, which in turn can e.g lead to constraints being
violated. Second, future hot prunes / vacuums can end up making
invisible tuples visible again. There's other harmful scenarios.

Fix the problem by recognizing that tuples can be DEAD instead of
RECENTLY_DEAD, even if the multixactid has alive members, if the
update_xid is below the xmin horizon. That's safe because newer
versions of the tuple will contain the locking xids.

A followup commit will harden the code somewhat against future similar
bugs and already corrupted data.

Author: Andres Freund, with changes by Alvaro Herrera
Reported-By: Daniel Wood
Analyzed-By: Andres Freund, Alvaro Herrera, Robert Haas, Peter
   Geoghegan, Daniel Wood, Yi Wen Wong, Michael Paquier
Reviewed-By: Alvaro Herrera, Robert Haas, Michael Paquier
Discussion:
    https://postgr.es/m/E5711E62-8FDF-4DCA-A888-C200BF6B5742@amazon.com
    https://postgr.es/m/20171102112019.33wb7g5wp4zpjelu@alap3.anarazel.de
Backpatch: 9.3-

src/backend/utils/time/tqual.c
src/test/isolation/expected/freeze-the-dead.out
src/test/isolation/isolation_schedule
src/test/isolation/specs/freeze-the-dead.spec

index a9413179dd37be0bc543709c83973914ba4e06e4..eb6cc3dcedb9abee27f3809e99d432157b3c4f8d 100644 (file)
@@ -1309,49 +1309,40 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
 
        if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
        {
-               TransactionId xmax;
-
-               if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
-               {
-                       /* already checked above */
-                       Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
-
-                       xmax = HeapTupleGetUpdateXid(tuple);
-
-                       /* not LOCKED_ONLY, so it has to have an xmax */
-                       Assert(TransactionIdIsValid(xmax));
-
-                       if (TransactionIdIsInProgress(xmax))
-                               return HEAPTUPLE_DELETE_IN_PROGRESS;
-                       else if (TransactionIdDidCommit(xmax))
-                               /* there are still lockers around -- can't return DEAD here */
-                               return HEAPTUPLE_RECENTLY_DEAD;
-                       /* updating transaction aborted */
-                       return HEAPTUPLE_LIVE;
-               }
-
-               Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED));
+               TransactionId xmax = HeapTupleGetUpdateXid(tuple);
 
-               xmax = HeapTupleGetUpdateXid(tuple);
+               /* already checked above */
+               Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
 
                /* not LOCKED_ONLY, so it has to have an xmax */
                Assert(TransactionIdIsValid(xmax));
 
-               /* multi is not running -- updating xact cannot be */
-               Assert(!TransactionIdIsInProgress(xmax));
-               if (TransactionIdDidCommit(xmax))
+               if (TransactionIdIsInProgress(xmax))
+                       return HEAPTUPLE_DELETE_IN_PROGRESS;
+               else if (TransactionIdDidCommit(xmax))
                {
+                       /*
+                        * The multixact might still be running due to lockers.  If the
+                        * updater is below the xid horizon, we have to return DEAD
+                        * regardless -- otherwise we could end up with a tuple where the
+                        * updater has to be removed due to the horizon, but is not pruned
+                        * away.  It's not a problem to prune that tuple, because any
+                        * remaining lockers will also be present in newer tuple versions.
+                        */
                        if (!TransactionIdPrecedes(xmax, OldestXmin))
                                return HEAPTUPLE_RECENTLY_DEAD;
-                       else
-                               return HEAPTUPLE_DEAD;
+
+                       return HEAPTUPLE_DEAD;
+               }
+               else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+               {
+                       /*
+                        * Not in Progress, Not Committed, so either Aborted or crashed.
+                        * Mark the Xmax as invalid.
+                        */
+                       SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
                }
 
-               /*
-                * Not in Progress, Not Committed, so either Aborted or crashed.
-                * Remove the Xmax.
-                */
-               SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
                return HEAPTUPLE_LIVE;
        }
 
index dd045613f9397e668843636f51d0074fb4f44753..8e638f132f955fba3813d9956bd59ce211e37fc7 100644 (file)
-Parsed test spec with 2 sessions
+Parsed test spec with 3 sessions
 
-starting permutation: s1_update s1_commit s1_vacuum s2_key_share s2_commit
+starting permutation: s1_begin s2_begin s3_begin s1_update s2_key_share s3_key_share s1_update s1_commit s2_commit s2_vacuum s1_selectone s3_commit s2_vacuum s1_selectall
+step s1_begin: BEGIN;
+step s2_begin: BEGIN;
+step s3_begin: BEGIN;
 step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
-step s1_commit: COMMIT;
-step s1_vacuum: VACUUM FREEZE tab_freeze;
 step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
 id             
 
 3              
-step s2_commit: COMMIT;
-
-starting permutation: s1_update s1_commit s2_key_share s1_vacuum s2_commit
-step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
-step s1_commit: COMMIT;
-step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+step s3_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
 id             
 
 3              
-step s1_vacuum: VACUUM FREEZE tab_freeze;
-step s2_commit: COMMIT;
-
-starting permutation: s1_update s1_commit s2_key_share s2_commit s1_vacuum
 step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
 step s1_commit: COMMIT;
-step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
-id             
-
-3              
 step s2_commit: COMMIT;
-step s1_vacuum: VACUUM FREEZE tab_freeze;
+step s2_vacuum: VACUUM FREEZE tab_freeze;
+step s1_selectone: 
+    BEGIN;
+    SET LOCAL enable_seqscan = false;
+    SET LOCAL enable_bitmapscan = false;
+    SELECT * FROM tab_freeze WHERE id = 3;
+    COMMIT;
 
-starting permutation: s1_update s2_key_share s1_commit s1_vacuum s2_commit
-step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
-step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
-id             
+id             name           x              
 
-3              
-step s1_commit: COMMIT;
-step s1_vacuum: VACUUM FREEZE tab_freeze;
-step s2_commit: COMMIT;
-
-starting permutation: s1_update s2_key_share s1_commit s2_commit s1_vacuum
-step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
-step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
-id             
-
-3              
-step s1_commit: COMMIT;
-step s2_commit: COMMIT;
-step s1_vacuum: VACUUM FREEZE tab_freeze;
-
-starting permutation: s1_update s2_key_share s2_commit s1_commit s1_vacuum
-step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
-step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
-id             
+3              333            2              
+step s3_commit: COMMIT;
+step s2_vacuum: VACUUM FREEZE tab_freeze;
+step s1_selectall: SELECT * FROM tab_freeze ORDER BY name, id;
+id             name           x              
 
-3              
-step s2_commit: COMMIT;
-step s1_commit: COMMIT;
-step s1_vacuum: VACUUM FREEZE tab_freeze;
-
-starting permutation: s2_key_share s1_update s1_commit s1_vacuum s2_commit
-step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
-id             
-
-3              
-step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
-step s1_commit: COMMIT;
-step s1_vacuum: VACUUM FREEZE tab_freeze;
-step s2_commit: COMMIT;
-
-starting permutation: s2_key_share s1_update s1_commit s2_commit s1_vacuum
-step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
-id             
-
-3              
-step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
-step s1_commit: COMMIT;
-step s2_commit: COMMIT;
-step s1_vacuum: VACUUM FREEZE tab_freeze;
-
-starting permutation: s2_key_share s1_update s2_commit s1_commit s1_vacuum
-step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
-id             
-
-3              
-step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
-step s2_commit: COMMIT;
-step s1_commit: COMMIT;
-step s1_vacuum: VACUUM FREEZE tab_freeze;
-
-starting permutation: s2_key_share s2_commit s1_update s1_commit s1_vacuum
-step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
-id             
-
-3              
-step s2_commit: COMMIT;
-step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
-step s1_commit: COMMIT;
-step s1_vacuum: VACUUM FREEZE tab_freeze;
+1              111            0              
+3              333            2              
index 8e404b7a355e861d8e16711444d881ed9251ba15..f1af282853f09b60ecd4e196920289ea1ebaade3 100644 (file)
@@ -41,6 +41,7 @@ test: update-locked-tuple
 test: propagate-lock-delete
 test: tuplelock-conflict
 test: tuplelock-update
+test: freeze-the-dead
 test: nowait
 test: nowait-2
 test: nowait-3
index 3cd9965b2fa8515727aaaf7f5e5d1c61fd71fd36..e24d7d5d116b2e96672dd44962ff8d665912a345 100644 (file)
@@ -16,12 +16,44 @@ teardown
 }
 
 session "s1"
-setup                          { BEGIN; }
+step "s1_begin"                { BEGIN; }
 step "s1_update"       { UPDATE tab_freeze SET x = x + 1 WHERE id = 3; }
 step "s1_commit"       { COMMIT; }
 step "s1_vacuum"       { VACUUM FREEZE tab_freeze; }
+step "s1_selectone"    {
+    BEGIN;
+    SET LOCAL enable_seqscan = false;
+    SET LOCAL enable_bitmapscan = false;
+    SELECT * FROM tab_freeze WHERE id = 3;
+    COMMIT;
+}
+step "s1_selectall"    { SELECT * FROM tab_freeze ORDER BY name, id; }
+step "s1_reindex"      { REINDEX TABLE tab_freeze; }
 
 session "s2"
-setup                          { BEGIN; }
+step "s2_begin"                { BEGIN; }
 step "s2_key_share"    { SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; }
 step "s2_commit"       { COMMIT; }
+step "s2_vacuum"       { VACUUM FREEZE tab_freeze; }
+
+session "s3"
+step "s3_begin"                { BEGIN; }
+step "s3_key_share"    { SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; }
+step "s3_commit"       { COMMIT; }
+step "s3_vacuum"       { VACUUM FREEZE tab_freeze; }
+
+# This permutation verfies that a previous bug
+#     https://postgr.es/m/E5711E62-8FDF-4DCA-A888-C200BF6B5742@amazon.com
+#     https://postgr.es/m/20171102112019.33wb7g5wp4zpjelu@alap3.anarazel.de
+# is not reintroduced. We used to make wrong pruning / freezing
+# decision for multixacts, which could lead to a) broken hot chains b)
+# dead rows being revived.
+permutation "s1_begin" "s2_begin" "s3_begin" # start transactions
+   "s1_update" "s2_key_share" "s3_key_share" # have xmax be a multi with an updater, updater being oldest xid
+   "s1_update" # create additional row version that has multis
+   "s1_commit" "s2_commit" # commit both updater and share locker
+   "s2_vacuum" # due to bug in freezing logic, we used to *not* prune updated row, and then froze it
+   "s1_selectone" # if hot chain is broken, the row can't be found via index scan
+   "s3_commit" # commit remaining open xact
+   "s2_vacuum" # pruning / freezing in broken hot chains would unset xmax, reviving rows
+   "s1_selectall" # show borkedness