From: Tom Lane Date: Sun, 16 Sep 2007 02:38:02 +0000 (+0000) Subject: Fix aboriginal mistake in lazy VACUUM's code for truncating away X-Git-Tag: REL8_1_10~1 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=be706260f0191f596a3b92baac7791acaa0957bf;p=postgresql Fix aboriginal mistake in lazy VACUUM's code for truncating away no-longer-needed pages at the end of a table. We thought we could throw away pages containing HEAPTUPLE_DEAD tuples; but this is not so, because such tuples very likely have index entries pointing at them, and we wouldn't have removed the index entries. The problem only emerges in a somewhat unlikely race condition: the dead tuples have to have been inserted by a transaction that later aborted, and this has to have happened between VACUUM's initial scan of the page and then rechecking it for empty in count_nondeletable_pages. But that timespan will include an index-cleaning pass, so it's not all that hard to hit. This seems to explain a couple of previously unsolved bug reports. --- diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index e6f7e654e2..a6d6e8929d 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.61.2.4 2007/09/12 02:05:55 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.61.2.5 2007/09/16 02:38:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -779,9 +779,9 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) /* * Scan backwards from the end to verify that the end pages actually - * contain nothing we need to keep. This is *necessary*, not optional, - * because other backends could have added tuples to these pages whilst we - * were vacuuming. + * contain no tuples. This is *necessary*, not optional, because other + * backends could have added tuples to these pages whilst we were + * vacuuming. */ new_rel_pages = count_nondeletable_pages(onerel, vacrelstats); @@ -833,7 +833,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) } /* - * Rescan end pages to verify that they are (still) empty of needed tuples. + * Rescan end pages to verify that they are (still) empty of tuples. * * Returns number of nondeletable pages (last nonempty page + 1). */ @@ -841,7 +841,6 @@ static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) { BlockNumber blkno; - HeapTupleData tuple; /* Strange coding of loop control is needed because blkno is unsigned */ blkno = vacrelstats->rel_pages; @@ -851,8 +850,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) Page page; OffsetNumber offnum, maxoff; - bool tupgone, - hastup; + bool hastup; /* * We don't insert a vacuum delay point here, because we have an @@ -889,43 +887,13 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) itemid = PageGetItemId(page, offnum); - if (!ItemIdIsUsed(itemid)) - continue; - - tuple.t_datamcxt = NULL; - tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); - tuple.t_len = ItemIdGetLength(itemid); - ItemPointerSet(&(tuple.t_self), blkno, offnum); - - tupgone = false; - - switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf)) - { - case HEAPTUPLE_DEAD: - tupgone = true; /* we can delete the tuple */ - break; - case HEAPTUPLE_LIVE: - /* Shouldn't be necessary to re-freeze anything */ - break; - case HEAPTUPLE_RECENTLY_DEAD: - - /* - * If tuple is recently deleted then we must not remove it - * from relation. - */ - break; - case HEAPTUPLE_INSERT_IN_PROGRESS: - /* This is an expected case during concurrent vacuum */ - break; - case HEAPTUPLE_DELETE_IN_PROGRESS: - /* This is an expected case during concurrent vacuum */ - break; - default: - elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); - break; - } - - if (!tupgone) + /* + * Note: any non-unused item should be taken as a reason to keep + * this page. We formerly thought that DEAD tuples could be + * thrown away, but that's not so, because we'd not have cleaned + * out their index entries. + */ + if (ItemIdIsUsed(itemid)) { hastup = true; break; /* can stop scanning */ @@ -943,7 +911,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) /* * If we fall out of the loop, all the previously-thought-to-be-empty - * pages really are; we need not bother to look at the last known-nonempty + * pages still are; we need not bother to look at the last known-nonempty * page. */ return vacrelstats->nonempty_pages;