Size freespace;
bool all_visible_according_to_vm = false;
bool all_visible;
+ bool has_dead_tuples;
/*
* Skip pages that don't require vacuuming according to the visibility
* requiring freezing.
*/
all_visible = true;
+ has_dead_tuples = false;
nfrozen = 0;
hastup = false;
prev_dead_count = vacrelstats->num_dead_tuples;
HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
&vacrelstats->latestRemovedXid);
tups_vacuumed += 1;
+ has_dead_tuples = true;
}
else
{
PageSetAllVisible(page);
SetBufferCommitInfoNeedsSave(buf);
}
- else if (PageIsAllVisible(page) && !all_visible)
+ /*
+ * It's possible for the value returned by GetOldestXmin() to move
+ * backwards, so it's not wrong for us to see tuples that appear to
+ * not be visible to everyone yet, while PD_ALL_VISIBLE is already
+ * set. The real safe xmin value never moves backwards, but
+ * GetOldestXmin() is conservative and sometimes returns a value
+ * that's unnecessarily small, so if we see that contradiction it
+ * just means that the tuples that we think are not visible to
+ * everyone yet actually are, and the PD_ALL_VISIBLE flag is correct.
+ *
+ * There should never be dead tuples on a page with PD_ALL_VISIBLE
+ * set, however.
+ */
+ else if (PageIsAllVisible(page) && has_dead_tuples)
{
- elog(WARNING, "PD_ALL_VISIBLE flag was incorrectly set in relation \"%s\" page %u",
+ elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
relname, blkno);
PageClearAllVisible(page);
SetBufferCommitInfoNeedsSave(buf);
* This is also used to determine where to truncate pg_subtrans. allDbs
* must be TRUE for that case, and ignoreVacuum FALSE.
*
+ * Note: it's possible for the calculated value to move backwards on repeated
+ * calls. The calculated value is conservative, so that anything older is
+ * definitely not considered as running by anyone anymore, but the exact
+ * value calculated depends on a number of things. For example, if allDbs is
+ * TRUE and there are no transactions running in the current database,
+ * GetOldestXmin() returns latestCompletedXid. If a transaction begins after
+ * that, its xmin will include in-progress transactions in other databases
+ * that started earlier, so another call will return an lower value. The
+ * return value is also adjusted with vacuum_defer_cleanup_age, so increasing
+ * that setting on the fly is an easy way to have GetOldestXmin() move
+ * backwards.
+ *
* Note: we include all currently running xids in the set of considered xids.
* This ensures that if a just-started xact has not yet set its snapshot,
* when it does set the snapshot it cannot set xmin less than what we compute.