From 6655a7299d835dea9e8e0ba69cc5284611b96f29 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 24 Jul 2019 20:24:07 +0300 Subject: [PATCH] Use full 64-bit XID for checking if a deleted GiST page is old enough. Otherwise, after a deleted page gets even older, it becomes unrecyclable again. B-tree has the same problem, and has had since time immemorial, but let's at least fix this in GiST, where this is new. Backpatch to v12, where GiST page deletion was introduced. Reviewed-by: Andrey Borodin Discussion: https://www.postgresql.org/message-id/835A15A5-F1B4-4446-A711-BF48357EB602%40yandex-team.ru --- src/backend/access/gist/gistutil.c | 24 ++++++++++++-- src/backend/access/gist/gistvacuum.c | 7 ++-- src/backend/access/gist/gistxlog.c | 32 ++++++++++++++---- src/backend/access/rmgrdesc/gistdesc.c | 11 ++++--- src/backend/utils/time/snapmgr.c | 30 +++++++++++++++++ src/include/access/gist.h | 45 +++++++++++++++++++++++--- src/include/access/gist_private.h | 4 +-- src/include/access/gistxlog.h | 6 ++-- src/include/utils/snapmgr.h | 3 ++ 9 files changed, 134 insertions(+), 28 deletions(-) diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 7d1b219bbc..97260201dc 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -882,9 +882,27 @@ gistNewBuffer(Relation r) bool gistPageRecyclable(Page page) { - return PageIsNew(page) || - (GistPageIsDeleted(page) && - TransactionIdPrecedes(GistPageGetDeleteXid(page), RecentGlobalXmin)); + if (PageIsNew(page)) + return true; + if (GistPageIsDeleted(page)) + { + /* + * The page was deleted, but when? If it was just deleted, a scan + * might have seen the downlink to it, and will read the page later. + * As long as that can happen, we must keep the deleted page around as + * a tombstone. + * + * Compare the deletion XID with RecentGlobalXmin. If deleteXid < + * RecentGlobalXmin, then no scan that's still in progress could have + * seen its downlink, and we can recycle it. + */ + FullTransactionId deletexid_full = GistPageGetDeleteXid(page); + FullTransactionId recentxmin_full = GetFullRecentGlobalXmin(); + + if (FullTransactionIdPrecedes(deletexid_full, recentxmin_full)) + return true; + } + return false; } bytea * diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 4270226eee..bf754ea6d0 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -595,7 +595,7 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, ItemId iid; IndexTuple idxtuple; XLogRecPtr recptr; - TransactionId txid; + FullTransactionId txid; /* * Check that the leaf is still empty and deletable. @@ -648,14 +648,13 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, * currently in progress must have ended. (That's much more conservative * than needed, but let's keep it safe and simple.) */ - txid = ReadNewTransactionId(); + txid = ReadNextFullTransactionId(); START_CRIT_SECTION(); /* mark the page as deleted */ MarkBufferDirty(leafBuffer); - GistPageSetDeleteXid(leafPage, txid); - GistPageSetDeleted(leafPage); + GistPageSetDeleted(leafPage, txid); stats->stats.pages_deleted++; /* remove the downlink from the parent */ diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 503db34d86..3b28f54646 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -356,8 +356,7 @@ gistRedoPageDelete(XLogReaderState *record) { Page page = (Page) BufferGetPage(leafBuffer); - GistPageSetDeleteXid(page, xldata->deleteXid); - GistPageSetDeleted(page); + GistPageSetDeleted(page, xldata->deleteXid); PageSetLSN(page, lsn); MarkBufferDirty(leafBuffer); @@ -396,8 +395,27 @@ gistRedoPageReuse(XLogReaderState *record) */ if (InHotStandby) { - ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, - xlrec->node); + FullTransactionId latestRemovedFullXid = xlrec->latestRemovedFullXid; + FullTransactionId nextFullXid = ReadNextFullTransactionId(); + uint64 diff; + + /* + * ResolveRecoveryConflictWithSnapshot operates on 32-bit + * TransactionIds, so truncate the logged FullTransactionId. If the + * logged value is very old, so that XID wrap-around already happened + * on it, there can't be any snapshots that still see it. + */ + nextFullXid = ReadNextFullTransactionId(); + diff = U64FromFullTransactionId(nextFullXid) - + U64FromFullTransactionId(latestRemovedFullXid); + if (diff < MaxTransactionId / 2) + { + TransactionId latestRemovedXid; + + latestRemovedXid = XidFromFullTransactionId(latestRemovedFullXid); + ResolveRecoveryConflictWithSnapshot(latestRemovedXid, + xlrec->node); + } } } @@ -554,7 +572,7 @@ gistXLogSplit(bool page_is_leaf, * downlink from the parent page. */ XLogRecPtr -gistXLogPageDelete(Buffer buffer, TransactionId xid, +gistXLogPageDelete(Buffer buffer, FullTransactionId xid, Buffer parentBuffer, OffsetNumber downlinkOffset) { gistxlogPageDelete xlrec; @@ -578,7 +596,7 @@ gistXLogPageDelete(Buffer buffer, TransactionId xid, * Write XLOG record about reuse of a deleted page. */ void -gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid) +gistXLogPageReuse(Relation rel, BlockNumber blkno, FullTransactionId latestRemovedXid) { gistxlogPageReuse xlrec_reuse; @@ -591,7 +609,7 @@ gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXi /* XLOG stuff */ xlrec_reuse.node = rel->rd_node; xlrec_reuse.block = blkno; - xlrec_reuse.latestRemovedXid = latestRemovedXid; + xlrec_reuse.latestRemovedFullXid = latestRemovedXid; XLogBeginInsert(); XLogRegisterData((char *) &xlrec_reuse, SizeOfGistxlogPageReuse); diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 767864b58e..eccb6fd942 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -26,10 +26,11 @@ out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec) static void out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) { - appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u", + appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u:%u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block, - xlrec->latestRemovedXid); + EpochFromFullTransactionId(xlrec->latestRemovedFullXid), + XidFromFullTransactionId(xlrec->latestRemovedFullXid)); } static void @@ -50,8 +51,10 @@ out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec) static void out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec) { - appendStringInfo(buf, "deleteXid %u; downlink %u", - xlrec->deleteXid, xlrec->downlinkOffset); + appendStringInfo(buf, "deleteXid %u:%u; downlink %u", + EpochFromFullTransactionId(xlrec->deleteXid), + XidFromFullTransactionId(xlrec->deleteXid), + xlrec->downlinkOffset); } void diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 6690d78137..40fe6ed3d3 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -956,6 +956,36 @@ xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg) return 0; } +/* + * Get current RecentGlobalXmin value, as a FullTransactionId. + */ +FullTransactionId +GetFullRecentGlobalXmin(void) +{ + FullTransactionId nextxid_full; + uint32 nextxid_epoch; + TransactionId nextxid_xid; + uint32 epoch; + + Assert(TransactionIdIsNormal(RecentGlobalXmin)); + + /* + * Compute the epoch from the next XID's epoch. This relies on the fact + * that RecentGlobalXmin must be within the 2 billion XID horizon from the + * next XID. + */ + nextxid_full = ReadNextFullTransactionId(); + nextxid_epoch = EpochFromFullTransactionId(nextxid_full); + nextxid_xid = XidFromFullTransactionId(nextxid_full); + + if (RecentGlobalXmin > nextxid_xid) + epoch = nextxid_epoch - 1; + else + epoch = nextxid_epoch; + + return FullTransactionIdFromEpochAndXid(epoch, RecentGlobalXmin); +} + /* * SnapshotResetXmin * diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 6902f4115b..8292956cc0 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -16,6 +16,7 @@ #ifndef GIST_H #define GIST_H +#include "access/transam.h" #include "access/xlog.h" #include "access/xlogdefs.h" #include "storage/block.h" @@ -140,8 +141,6 @@ typedef struct GISTENTRY #define GIST_LEAF(entry) (GistPageIsLeaf((entry)->page)) #define GistPageIsDeleted(page) ( GistPageGetOpaque(page)->flags & F_DELETED) -#define GistPageSetDeleted(page) ( GistPageGetOpaque(page)->flags |= F_DELETED) -#define GistPageSetNonDeleted(page) ( GistPageGetOpaque(page)->flags &= ~F_DELETED) #define GistTuplesDeleted(page) ( GistPageGetOpaque(page)->flags & F_TUPLES_DELETED) #define GistMarkTuplesDeleted(page) ( GistPageGetOpaque(page)->flags |= F_TUPLES_DELETED) @@ -158,9 +157,45 @@ typedef struct GISTENTRY #define GistPageGetNSN(page) ( PageXLogRecPtrGet(GistPageGetOpaque(page)->nsn)) #define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(GistPageGetOpaque(page)->nsn, val)) -/* For deleted pages we store last xid which could see the page in scan */ -#define GistPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) -#define GistPageSetDeleteXid(page, val) ( ((PageHeader) (page))->pd_prune_xid = val) + +/* + * On a deleted page, we store this struct. A deleted page doesn't contain any + * tuples, so we don't use the normal page layout with line pointers. Instead, + * this struct is stored right after the standard page header. pd_lower points + * to the end of this struct. If we add fields to this struct in the future, we + * can distinguish the old and new formats by pd_lower. + */ +typedef struct GISTDeletedPageContents +{ + /* last xid which could see the page in a scan */ + FullTransactionId deleteXid; +} GISTDeletedPageContents; + +static inline void +GistPageSetDeleted(Page page, FullTransactionId deletexid) +{ + Assert(PageIsEmpty(page)); + + GistPageGetOpaque(page)->flags |= F_DELETED; + ((PageHeader) page)->pd_lower = MAXALIGN(SizeOfPageHeaderData) + sizeof(GISTDeletedPageContents); + + ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid = deletexid; +} + +static inline FullTransactionId +GistPageGetDeleteXid(Page page) +{ + Assert(GistPageIsDeleted(page)); + + /* Is the deleteXid field present? */ + if (((PageHeader) page)->pd_lower >= MAXALIGN(SizeOfPageHeaderData) + + offsetof(GISTDeletedPageContents, deleteXid) + sizeof(FullTransactionId)) + { + return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid; + } + else + return FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); +} /* * Vector of GISTENTRY structs; user-defined methods union and picksplit diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 9e3958398e..0488d01c9b 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -426,11 +426,11 @@ extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup, /* gistxlog.c */ extern XLogRecPtr gistXLogPageDelete(Buffer buffer, - TransactionId xid, Buffer parentBuffer, + FullTransactionId xid, Buffer parentBuffer, OffsetNumber downlinkOffset); extern void gistXLogPageReuse(Relation rel, BlockNumber blkno, - TransactionId latestRemovedXid); + FullTransactionId latestRemovedXid); extern XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index 969a5376b5..e44922d915 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -83,7 +83,7 @@ typedef struct gistxlogPageSplit */ typedef struct gistxlogPageDelete { - TransactionId deleteXid; /* last Xid which could see page in scan */ + FullTransactionId deleteXid; /* last Xid which could see page in scan */ OffsetNumber downlinkOffset; /* Offset of downlink referencing this * page */ } gistxlogPageDelete; @@ -98,10 +98,10 @@ typedef struct gistxlogPageReuse { RelFileNode node; BlockNumber block; - TransactionId latestRemovedXid; + FullTransactionId latestRemovedFullXid; } gistxlogPageReuse; -#define SizeOfGistxlogPageReuse (offsetof(gistxlogPageReuse, latestRemovedXid) + sizeof(TransactionId)) +#define SizeOfGistxlogPageReuse (offsetof(gistxlogPageReuse, latestRemovedFullXid) + sizeof(FullTransactionId)) extern void gist_redo(XLogReaderState *record); extern void gist_desc(StringInfo buf, XLogReaderState *record); diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index 58ae3b0c7a..6641ee510a 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -13,6 +13,7 @@ #ifndef SNAPMGR_H #define SNAPMGR_H +#include "access/transam.h" #include "fmgr.h" #include "utils/relcache.h" #include "utils/resowner.h" @@ -122,6 +123,8 @@ extern void UnregisterSnapshot(Snapshot snapshot); extern Snapshot RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner); extern void UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner); +extern FullTransactionId GetFullRecentGlobalXmin(void); + extern void AtSubCommit_Snapshot(int level); extern void AtSubAbort_Snapshot(int level); extern void AtEOXact_Snapshot(bool isCommit, bool resetXmin); -- 2.40.0