X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbackend%2Faccess%2Ftransam%2Fsubtrans.c;h=f6406611301f8f7bdb320665947ee45a264f3c86;hb=c7b8998ebbf310a156aa38022555a24d98fdbfb4;hp=ace1bb1434a868cf7a2429d58589cee5f7a497bb;hpb=f009c316ba49857249611bc0d578c518c449879e;p=postgresql diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index ace1bb1434..f640661130 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -1,48 +1,51 @@ /*------------------------------------------------------------------------- * * subtrans.c - * PostgreSQL subtrans-log manager + * PostgreSQL subtransaction-log manager * - * The pg_subtrans manager is a pg_clog-like manager which stores the parent + * The pg_subtrans manager is a pg_xact-like manager that stores the parent * transaction Id for each transaction. It is a fundamental part of the * nested transactions implementation. A main transaction has a parent * of InvalidTransactionId, and each subtransaction has its immediate parent. * The tree can easily be walked from child to parent, but not in the * opposite direction. * - * This code is mostly derived from clog.c. + * This code is based on xact.c, but the robustness requirements + * are completely different from pg_xact, because we only need to remember + * pg_subtrans information for currently-open transactions. Thus, there is + * no need to preserve data over a crash and restart. * - * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + * There are no XLOG interactions since we do not care about preserving + * data across crashes. During database startup, we simply force the + * currently-active page of SUBTRANS to zeroes. + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.2 2004/08/22 02:41:57 tgl Exp $ + * src/backend/access/transam/subtrans.c * *------------------------------------------------------------------------- */ #include "postgres.h" -#include -#include -#include -#include - #include "access/slru.h" #include "access/subtrans.h" -#include "miscadmin.h" -#include "storage/lwlock.h" -#include "utils/tqual.h" +#include "access/transam.h" +#include "pg_trace.h" +#include "utils/snapmgr.h" /* - * Defines for SubTrans page and segment sizes. A page is the same BLCKSZ - * as is used everywhere else in Postgres. + * Defines for SubTrans page sizes. A page is the same BLCKSZ as is used + * everywhere else in Postgres. * * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, * SubTrans page numbering also wraps around at * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at - * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no + * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need take no * explicit notice of that fact in this module, except when comparing segment - * and page numbers in TruncateSubTrans (see SubTransPagePrecedes). + * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes) and zeroing + * them in StartupSUBTRANS. */ /* We need four bytes per xact */ @@ -52,30 +55,16 @@ #define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE) -/*---------- - * Shared-memory data structures for SUBTRANS control - * - * XLOG interactions: this module generates an XLOG record whenever a new - * SUBTRANS page is initialized to zeroes. Other writes of SUBTRANS come from - * recording of transaction commit or abort in xact.c, which generates its - * own XLOG records for these events and will re-perform the status update - * on redo; so we need make no additional XLOG entry here. Also, the XLOG - * is guaranteed flushed through the XLOG commit record before we are called - * to log a commit, so the WAL rule "write xlog before data" is satisfied - * automatically for commits, and we don't really care for aborts. Therefore, - * we don't need to mark SUBTRANS pages with LSN information; we have enough - * synchronization already. - *---------- +/* + * Link to shared-memory data structures for SUBTRANS control */ - - static SlruCtlData SubTransCtlData; -static SlruCtl SubTransCtl = &SubTransCtlData; + +#define SubTransCtl (&SubTransCtlData) -static int ZeroSUBTRANSPage(int pageno, bool writeXlog); +static int ZeroSUBTRANSPage(int pageno); static bool SubTransPagePrecedes(int page1, int page2); -static void WriteZeroPageXlogRec(int pageno); /* @@ -86,21 +75,31 @@ SubTransSetParent(TransactionId xid, TransactionId parent) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); + int slotno; TransactionId *ptr; - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); - - ptr = (TransactionId *) SimpleLruReadPage(SubTransCtl, pageno, xid, true); - ptr += entryno; + Assert(TransactionIdIsValid(parent)); + Assert(TransactionIdFollows(xid, parent)); - /* Current state should be 0 or target state */ - Assert(*ptr == InvalidTransactionId || *ptr == parent); + LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); - *ptr = parent; + slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid); + ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; + ptr += entryno; - /* ...->page_status[slotno] = SLRU_PAGE_DIRTY; already done */ + /* + * It's possible we'll try to set the parent xid multiple times but we + * shouldn't ever be changing the xid from one valid xid to another valid + * xid, which would corrupt the data structure. + */ + if (*ptr != parent) + { + Assert(*ptr == InvalidTransactionId); + *ptr = parent; + SubTransCtl->shared->page_dirty[slotno] = true; + } - LWLockRelease(SubTransCtl->ControlLock); + LWLockRelease(SubtransControlLock); } /* @@ -111,24 +110,26 @@ SubTransGetParent(TransactionId xid) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); + int slotno; TransactionId *ptr; - TransactionId parent; + TransactionId parent; /* Can't ask about stuff that might not be around anymore */ - Assert(TransactionIdFollowsOrEquals(xid, RecentXmin)); + Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin)); /* Bootstrap and frozen XIDs have no parent */ if (!TransactionIdIsNormal(xid)) return InvalidTransactionId; - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + /* lock is acquired by SimpleLruReadPage_ReadOnly */ - ptr = (TransactionId *) SimpleLruReadPage(SubTransCtl, pageno, xid, false); + slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid); + ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; parent = *ptr; - LWLockRelease(SubTransCtl->ControlLock); + LWLockRelease(SubtransControlLock); return parent; } @@ -138,28 +139,37 @@ SubTransGetParent(TransactionId xid) * * Returns the topmost transaction of the given transaction id. * - * Because we cannot look back further than RecentXmin, it is possible + * Because we cannot look back further than TransactionXmin, it is possible * that this function will lie and return an intermediate subtransaction ID * instead of the true topmost parent ID. This is OK, because in practice * we only care about detecting whether the topmost parent is still running * or is part of a current snapshot's list of still-running transactions. - * Therefore, any XID before RecentXmin is as good as any other. + * Therefore, any XID before TransactionXmin is as good as any other. */ TransactionId SubTransGetTopmostTransaction(TransactionId xid) { TransactionId parentXid = xid, - previousXid = xid; + previousXid = xid; /* Can't ask about stuff that might not be around anymore */ - Assert(TransactionIdFollowsOrEquals(xid, RecentXmin)); + Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin)); while (TransactionIdIsValid(parentXid)) { previousXid = parentXid; - if (TransactionIdPrecedes(parentXid, RecentXmin)) + if (TransactionIdPrecedes(parentXid, TransactionXmin)) break; parentXid = SubTransGetParent(parentXid); + + /* + * By convention the parent xid gets allocated first, so should always + * precede the child xid. Anything else points to a corrupted data + * structure that could lead to an infinite loop, so exit. + */ + if (!TransactionIdPrecedes(parentXid, previousXid)) + elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u", + previousXid, parentXid); } Assert(TransactionIdIsValid(previousXid)); @@ -169,48 +179,54 @@ SubTransGetTopmostTransaction(TransactionId xid) /* - * Initialization of shared memory for Subtrans + * Initialization of shared memory for SUBTRANS */ - -int +Size SUBTRANSShmemSize(void) { - return SimpleLruShmemSize(); + return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0); } void SUBTRANSShmemInit(void) { - SimpleLruInit(SubTransCtl, "SUBTRANS Ctl", "pg_subtrans"); SubTransCtl->PagePrecedes = SubTransPagePrecedes; + SimpleLruInit(SubTransCtl, "subtrans", NUM_SUBTRANS_BUFFERS, 0, + SubtransControlLock, "pg_subtrans", + LWTRANCHE_SUBTRANS_BUFFERS); + /* Override default assumption that writes should be fsync'd */ + SubTransCtl->do_fsync = false; } /* * This func must be called ONCE on system install. It creates - * the initial SubTrans segment. (The SubTrans directory is assumed to - * have been created by initdb, and SubTransShmemInit must have been called - * already.) + * the initial SUBTRANS segment. (The SUBTRANS directory is assumed to + * have been created by the initdb shell script, and SUBTRANSShmemInit + * must have been called already.) + * + * Note: it's not really necessary to create the initial segment now, + * since slru.c would create it on first write anyway. But we may as well + * do it to be sure the directory is set up correctly. */ void BootStrapSUBTRANS(void) { int slotno; - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); - /* Create and zero the first page of the commit log */ - slotno = ZeroSUBTRANSPage(0, false); + /* Create and zero the first page of the subtrans log */ + slotno = ZeroSUBTRANSPage(0); /* Make sure it's written out */ - SimpleLruWritePage(SubTransCtl, slotno, NULL); - /* Assert(SubTransCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ + SimpleLruWritePage(SubTransCtl, slotno); + Assert(!SubTransCtl->shared->page_dirty[slotno]); - LWLockRelease(SubTransCtl->ControlLock); + LWLockRelease(SubtransControlLock); } /* - * Initialize (or reinitialize) a page of SubTrans to zeroes. - * If writeXlog is TRUE, also emit an XLOG record saying we did this. + * Initialize (or reinitialize) a page of SUBTRANS to zeroes. * * The page is not actually written, just set up in shared memory. * The slot number of the new page is returned. @@ -218,28 +234,46 @@ BootStrapSUBTRANS(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroSUBTRANSPage(int pageno, bool writeXlog) +ZeroSUBTRANSPage(int pageno) { - int slotno = SimpleLruZeroPage(SubTransCtl, pageno); - - if (writeXlog) - WriteZeroPageXlogRec(pageno); - - return slotno; + return SimpleLruZeroPage(SubTransCtl, pageno); } /* * This must be called ONCE during postmaster or standalone-backend startup, * after StartupXLOG has initialized ShmemVariableCache->nextXid. + * + * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid + * if there are none. */ void -StartupSUBTRANS(void) +StartupSUBTRANS(TransactionId oldestActiveXID) { + int startPage; + int endPage; + /* - * Initialize our idea of the latest page number. + * Since we don't expect pg_subtrans to be valid across crashes, we + * initialize the currently-active page(s) to zeroes during startup. + * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero + * the new page without regard to whatever was previously on disk. */ - SimpleLruSetLatestPage(SubTransCtl, - TransactionIdToPage(ShmemVariableCache->nextXid)); + LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); + + startPage = TransactionIdToPage(oldestActiveXID); + endPage = TransactionIdToPage(ShmemVariableCache->nextXid); + + while (startPage != endPage) + { + (void) ZeroSUBTRANSPage(startPage); + startPage++; + /* must account for wraparound */ + if (startPage > TransactionIdToPage(MaxTransactionId)) + startPage = 0; + } + (void) ZeroSUBTRANSPage(startPage); + + LWLockRelease(SubtransControlLock); } /* @@ -248,7 +282,15 @@ StartupSUBTRANS(void) void ShutdownSUBTRANS(void) { + /* + * Flush dirty SUBTRANS pages to disk + * + * This is not actually necessary from a correctness point of view. We do + * it merely as a debugging aid. + */ + TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(false); SimpleLruFlush(SubTransCtl, false); + TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(false); } /* @@ -257,16 +299,25 @@ ShutdownSUBTRANS(void) void CheckPointSUBTRANS(void) { + /* + * Flush dirty SUBTRANS pages to disk + * + * This is not actually necessary from a correctness point of view. We do + * it merely to improve the odds that writing of dirty pages is done by + * the checkpoint process and not by backends. + */ + TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(true); SimpleLruFlush(SubTransCtl, true); + TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(true); } /* - * Make sure that SubTrans has room for a newly-allocated XID. + * Make sure that SUBTRANS has room for a newly-allocated XID. * * NB: this is called while holding XidGenLock. We want it to be very fast * most of the time; even when it's not so fast, no actual I/O need happen - * unless we're forced to write out a dirty subtrans or xlog page to make room + * unless we're forced to write out a dirty subtrans page to make room * in shared memory. */ void @@ -284,28 +335,20 @@ ExtendSUBTRANS(TransactionId newestXact) pageno = TransactionIdToPage(newestXact); - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); - /* Zero the page and make an XLOG entry about it */ - ZeroSUBTRANSPage(pageno, true); + /* Zero the page */ + ZeroSUBTRANSPage(pageno); - LWLockRelease(SubTransCtl->ControlLock); + LWLockRelease(SubtransControlLock); } /* - * Remove all SubTrans segments before the one holding the passed transaction ID + * Remove all SUBTRANS segments before the one holding the passed transaction ID * - * When this is called, we know that the database logically contains no - * reference to transaction IDs older than oldestXact. However, we must - * not truncate the SubTrans until we have performed a checkpoint, to ensure - * that no such references remain on disk either; else a crash just after - * the truncation might leave us with a problem. Since SubTrans segments hold - * a large number of transactions, the opportunity to actually remove a - * segment is fairly rare, and so it seems best not to do the checkpoint - * unless we have confirmed that there is a removable segment. Therefore - * we issue the checkpoint command here, not in higher-level code as might - * seem cleaner. + * This is normally called during checkpoint, with oldestXact being the + * oldest TransactionXmin of any running transaction. */ void TruncateSUBTRANS(TransactionId oldestXact) @@ -313,16 +356,22 @@ TruncateSUBTRANS(TransactionId oldestXact) int cutoffPage; /* - * The cutoff point is the start of the segment containing oldestXact. - * We pass the *page* containing oldestXact to SimpleLruTruncate. + * The cutoff point is the start of the segment containing oldestXact. We + * pass the *page* containing oldestXact to SimpleLruTruncate. We step + * back one transaction to avoid passing a cutoff page that hasn't been + * created yet in the rare case that oldestXact would be the first item on + * a page and oldestXact == next XID. In that case, if we didn't subtract + * one, we'd trigger SimpleLruTruncate's wraparound detection. */ + TransactionIdRetreat(oldestXact); cutoffPage = TransactionIdToPage(oldestXact); + SimpleLruTruncate(SubTransCtl, cutoffPage); } /* - * Decide which of two SubTrans page numbers is "older" for truncation purposes. + * Decide which of two SUBTRANS page numbers is "older" for truncation purposes. * * We need to use comparison of TransactionIds here in order to do the right * thing with wraparound XID arithmetic. However, if we are asked about @@ -343,38 +392,3 @@ SubTransPagePrecedes(int page1, int page2) return TransactionIdPrecedes(xid1, xid2); } - - -/* - * Write a ZEROPAGE xlog record - * - * Note: xlog record is marked as outside transaction control, since we - * want it to be redone whether the invoking transaction commits or not. - * (Besides which, this is normally done just before entering a transaction.) - */ -static void -WriteZeroPageXlogRec(int pageno) -{ - XLogRecData rdata; - - rdata.buffer = InvalidBuffer; - rdata.data = (char *) (&pageno); - rdata.len = sizeof(int); - rdata.next = NULL; - (void) XLogInsert(RM_SLRU_ID, SUBTRANS_ZEROPAGE | XLOG_NO_TRAN, &rdata); -} - -/* Redo a ZEROPAGE action during WAL replay */ -void -subtrans_zeropage_redo(int pageno) -{ - int slotno; - - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); - - slotno = ZeroSUBTRANSPage(pageno, false); - SimpleLruWritePage(SubTransCtl, slotno, NULL); - /* Assert(SubTransCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ - - LWLockRelease(SubTransCtl->ControlLock); -}