From 20b655224249e6d2daf7ef0595995228baddb381 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Thu, 28 Sep 2017 16:44:01 +0200 Subject: [PATCH] Fix freezing of a dead HOT-updated tuple MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Vacuum calls page-level HOT prune to remove dead HOT tuples before doing liveness checks (HeapTupleSatisfiesVacuum) on the remaining tuples. But concurrent transaction commit/abort may turn DEAD some of the HOT tuples that survived the prune, before HeapTupleSatisfiesVacuum tests them. This happens to activate the code that decides to freeze the tuple ... which resuscitates it, duplicating data. (This is especially bad if there's any unique constraints, because those are now internally violated due to the duplicate entries, though you won't know until you try to REINDEX or dump/restore the table.) One possible fix would be to simply skip doing anything to the tuple, and hope that the next HOT prune would remove it. But there is a problem: if the tuple is older than freeze horizon, this would leave an unfrozen XID behind, and if no HOT prune happens to clean it up before the containing pg_clog segment is truncated away, it'd later cause an error when the XID is looked up. Fix the problem by having the tuple freezing routines cope with the situation: don't freeze the tuple (and keep it dead). In the cases that the XID is older than the freeze age, set the HEAP_XMAX_COMMITTED flag so that there is no need to look up the XID in pg_clog later on. An isolation test is included, authored by Michael Paquier, loosely based on Daniel Wood's original reproducer. It only tests one particular scenario, though, not all the possible ways for this problem to surface; it be good to have a more reliable way to test this more fully, but it'd require more work. In message https://postgr.es/m/20170911140103.5akxptyrwgpc25bw@alvherre.pgsql I outlined another test case (more closely matching Dan Wood's) that exposed a few more ways for the problem to occur. Backpatch all the way back to 9.3, where this problem was introduced by multixact juggling. In branches 9.3 and 9.4, this includes a backpatch of commit e5ff9fefcd50 (of 9.5 era), since the original is not correctable without matching the coding pattern in 9.5 up. Reported-by: Daniel Wood Diagnosed-by: Daniel Wood Reviewed-by: Yi Wen Wong, Michaël Paquier Discussion: https://postgr.es/m/E5711E62-8FDF-4DCA-A888-C200BF6B5742@amazon.com --- src/backend/access/heap/heapam.c | 57 +++++++--- src/backend/commands/vacuumlazy.c | 20 ++-- .../isolation/expected/freeze-the-dead.out | 101 ++++++++++++++++++ src/test/isolation/isolation_schedule | 1 + src/test/isolation/specs/freeze-the-dead.spec | 27 +++++ 5 files changed, 179 insertions(+), 27 deletions(-) create mode 100644 src/test/isolation/expected/freeze-the-dead.out create mode 100644 src/test/isolation/specs/freeze-the-dead.spec diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index d03f544d26..c435482cd2 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6405,14 +6405,23 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, Assert(TransactionIdIsValid(xid)); /* - * If the xid is older than the cutoff, it has to have aborted, - * otherwise the tuple would have gotten pruned away. + * The updating transaction cannot possibly be still running, but + * verify whether it has committed, and request to set the + * COMMITTED flag if so. (We normally don't see any tuples in + * this state, because they are removed by page pruning before we + * try to freeze the page; but this can happen if the updating + * transaction commits after the page is pruned but before + * HeapTupleSatisfiesVacuum). */ if (TransactionIdPrecedes(xid, cutoff_xid)) { - Assert(!TransactionIdDidCommit(xid)); - *flags |= FRM_INVALIDATE_XMAX; - xid = InvalidTransactionId; /* not strictly necessary */ + if (TransactionIdDidCommit(xid)) + *flags = FRM_MARK_COMMITTED | FRM_RETURN_IS_XID; + else + { + *flags |= FRM_INVALIDATE_XMAX; + xid = InvalidTransactionId; /* not strictly necessary */ + } } else { @@ -6485,13 +6494,16 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, /* * It's an update; should we keep it? If the transaction is known * aborted or crashed then it's okay to ignore it, otherwise not. - * Note that an updater older than cutoff_xid cannot possibly be - * committed, because HeapTupleSatisfiesVacuum would have returned - * HEAPTUPLE_DEAD and we would not be trying to freeze the tuple. * * As with all tuple visibility routines, it's critical to test * TransactionIdIsInProgress before TransactionIdDidCommit, * because of race conditions explained in detail in tqual.c. + * + * We normally don't see committed updating transactions earlier + * than the cutoff xid, because they are removed by page pruning + * before we try to freeze the page; but it can happen if the + * updating transaction commits after the page is pruned but + * before HeapTupleSatisfiesVacuum. */ if (TransactionIdIsCurrentTransactionId(xid) || TransactionIdIsInProgress(xid)) @@ -6516,13 +6528,6 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * we can ignore it. */ - /* - * Since the tuple wasn't marked HEAPTUPLE_DEAD by vacuum, the - * update Xid cannot possibly be older than the xid cutoff. - */ - Assert(!TransactionIdIsValid(update_xid) || - !TransactionIdPrecedes(update_xid, cutoff_xid)); - /* * If we determined that it's an Xid corresponding to an update * that must be retained, additionally add it to the list of @@ -6601,7 +6606,10 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * * It is assumed that the caller has checked the tuple with * HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD - * (else we should be removing the tuple, not freezing it). + * (else we should be removing the tuple, not freezing it). However, note + * that we don't remove HOT tuples even if they are dead, and it'd be incorrect + * to freeze them (because that would make them visible), so we mark them as + * update-committed, and needing further freezing later on. * * NB: cutoff_xid *must* be <= the current global xmin, to ensure that any * XID older than it could neither be running nor seen as running by any @@ -6712,7 +6720,22 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, else if (TransactionIdIsNormal(xid)) { if (TransactionIdPrecedes(xid, cutoff_xid)) - freeze_xmax = true; + { + /* + * Must freeze regular XIDs older than the cutoff. We must not + * freeze a HOT-updated tuple, though; doing so would bring it + * back to life. + */ + if (HeapTupleHeaderIsHotUpdated(tuple)) + { + frz->t_infomask |= HEAP_XMAX_COMMITTED; + totally_frozen = false; + changed = true; + /* must not freeze */ + } + else + freeze_xmax = true; + } else totally_frozen = false; } diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 45b1859475..30b1c08c6c 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -2018,17 +2018,17 @@ lazy_record_dead_tuple(LVRelStats *vacrelstats, ItemPointer itemptr) { /* - * The array shouldn't overflow under normal behavior, but perhaps it - * could if we are given a really small maintenance_work_mem. In that - * case, just forget the last few tuples (we'll get 'em next time). + * The array must never overflow, since we rely on all deletable tuples + * being removed; inability to remove a tuple might cause an old XID to + * persist beyond the freeze limit, which could be disastrous later on. */ - if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples) - { - vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr; - vacrelstats->num_dead_tuples++; - pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES, - vacrelstats->num_dead_tuples); - } + if (vacrelstats->num_dead_tuples >= vacrelstats->max_dead_tuples) + elog(ERROR, "dead tuple array overflow"); + + vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr; + vacrelstats->num_dead_tuples++; + pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES, + vacrelstats->num_dead_tuples); } /* diff --git a/src/test/isolation/expected/freeze-the-dead.out b/src/test/isolation/expected/freeze-the-dead.out new file mode 100644 index 0000000000..dd045613f9 --- /dev/null +++ b/src/test/isolation/expected/freeze-the-dead.out @@ -0,0 +1,101 @@ +Parsed test spec with 2 sessions + +starting permutation: s1_update s1_commit s1_vacuum s2_key_share s2_commit +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s1_commit: COMMIT; +step s1_vacuum: VACUUM FREEZE tab_freeze; +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s2_commit: COMMIT; + +starting permutation: s1_update s1_commit s2_key_share s1_vacuum s2_commit +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s1_commit: COMMIT; +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s1_vacuum: VACUUM FREEZE tab_freeze; +step s2_commit: COMMIT; + +starting permutation: s1_update s1_commit s2_key_share s2_commit s1_vacuum +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s1_commit: COMMIT; +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s2_commit: COMMIT; +step s1_vacuum: VACUUM FREEZE tab_freeze; + +starting permutation: s1_update s2_key_share s1_commit s1_vacuum s2_commit +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s1_commit: COMMIT; +step s1_vacuum: VACUUM FREEZE tab_freeze; +step s2_commit: COMMIT; + +starting permutation: s1_update s2_key_share s1_commit s2_commit s1_vacuum +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s1_commit: COMMIT; +step s2_commit: COMMIT; +step s1_vacuum: VACUUM FREEZE tab_freeze; + +starting permutation: s1_update s2_key_share s2_commit s1_commit s1_vacuum +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s2_commit: COMMIT; +step s1_commit: COMMIT; +step s1_vacuum: VACUUM FREEZE tab_freeze; + +starting permutation: s2_key_share s1_update s1_commit s1_vacuum s2_commit +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s1_commit: COMMIT; +step s1_vacuum: VACUUM FREEZE tab_freeze; +step s2_commit: COMMIT; + +starting permutation: s2_key_share s1_update s1_commit s2_commit s1_vacuum +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s1_commit: COMMIT; +step s2_commit: COMMIT; +step s1_vacuum: VACUUM FREEZE tab_freeze; + +starting permutation: s2_key_share s1_update s2_commit s1_commit s1_vacuum +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s2_commit: COMMIT; +step s1_commit: COMMIT; +step s1_vacuum: VACUUM FREEZE tab_freeze; + +starting permutation: s2_key_share s2_commit s1_update s1_commit s1_vacuum +step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; +id + +3 +step s2_commit: COMMIT; +step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3; +step s1_commit: COMMIT; +step s1_vacuum: VACUUM FREEZE tab_freeze; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 32c965b2a0..7dad3c2316 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -44,6 +44,7 @@ test: update-locked-tuple test: propagate-lock-delete test: tuplelock-conflict test: tuplelock-update +test: freeze-the-dead test: nowait test: nowait-2 test: nowait-3 diff --git a/src/test/isolation/specs/freeze-the-dead.spec b/src/test/isolation/specs/freeze-the-dead.spec new file mode 100644 index 0000000000..3cd9965b2f --- /dev/null +++ b/src/test/isolation/specs/freeze-the-dead.spec @@ -0,0 +1,27 @@ +# Test for interactions of tuple freezing with dead, as well as recently-dead +# tuples using multixacts via FOR KEY SHARE. +setup +{ + CREATE TABLE tab_freeze ( + id int PRIMARY KEY, + name char(3), + x int); + INSERT INTO tab_freeze VALUES (1, '111', 0); + INSERT INTO tab_freeze VALUES (3, '333', 0); +} + +teardown +{ + DROP TABLE tab_freeze; +} + +session "s1" +setup { BEGIN; } +step "s1_update" { UPDATE tab_freeze SET x = x + 1 WHERE id = 3; } +step "s1_commit" { COMMIT; } +step "s1_vacuum" { VACUUM FREEZE tab_freeze; } + +session "s2" +setup { BEGIN; } +step "s2_key_share" { SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; } +step "s2_commit" { COMMIT; } -- 2.40.0