/*-------------------------------------------------------------------------
*
* subtrans.c
- * PostgreSQL subtrans-log manager
+ * PostgreSQL subtransaction-log manager
*
- * The pg_subtrans manager is a pg_clog-like manager which stores the parent
+ * The pg_subtrans manager is a pg_xact-like manager that stores the parent
* transaction Id for each transaction. It is a fundamental part of the
* nested transactions implementation. A main transaction has a parent
* of InvalidTransactionId, and each subtransaction has its immediate parent.
* The tree can easily be walked from child to parent, but not in the
* opposite direction.
*
- * This code is mostly derived from clog.c.
+ * This code is based on xact.c, but the robustness requirements
+ * are completely different from pg_xact, because we only need to remember
+ * pg_subtrans information for currently-open transactions. Thus, there is
+ * no need to preserve data over a crash and restart.
*
- * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * There are no XLOG interactions since we do not care about preserving
+ * data across crashes. During database startup, we simply force the
+ * currently-active page of SUBTRANS to zeroes.
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.2 2004/08/22 02:41:57 tgl Exp $
+ * src/backend/access/transam/subtrans.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include <fcntl.h>
-#include <dirent.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
#include "access/slru.h"
#include "access/subtrans.h"
-#include "miscadmin.h"
-#include "storage/lwlock.h"
-#include "utils/tqual.h"
+#include "access/transam.h"
+#include "pg_trace.h"
+#include "utils/snapmgr.h"
/*
- * Defines for SubTrans page and segment sizes. A page is the same BLCKSZ
- * as is used everywhere else in Postgres.
+ * Defines for SubTrans page sizes. A page is the same BLCKSZ as is used
+ * everywhere else in Postgres.
*
* Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
* SubTrans page numbering also wraps around at
* 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no
+ * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need take no
* explicit notice of that fact in this module, except when comparing segment
- * and page numbers in TruncateSubTrans (see SubTransPagePrecedes).
+ * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes) and zeroing
+ * them in StartupSUBTRANS.
*/
/* We need four bytes per xact */
#define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE)
-/*----------
- * Shared-memory data structures for SUBTRANS control
- *
- * XLOG interactions: this module generates an XLOG record whenever a new
- * SUBTRANS page is initialized to zeroes. Other writes of SUBTRANS come from
- * recording of transaction commit or abort in xact.c, which generates its
- * own XLOG records for these events and will re-perform the status update
- * on redo; so we need make no additional XLOG entry here. Also, the XLOG
- * is guaranteed flushed through the XLOG commit record before we are called
- * to log a commit, so the WAL rule "write xlog before data" is satisfied
- * automatically for commits, and we don't really care for aborts. Therefore,
- * we don't need to mark SUBTRANS pages with LSN information; we have enough
- * synchronization already.
- *----------
+/*
+ * Link to shared-memory data structures for SUBTRANS control
*/
-
-
static SlruCtlData SubTransCtlData;
-static SlruCtl SubTransCtl = &SubTransCtlData;
+
+#define SubTransCtl (&SubTransCtlData)
-static int ZeroSUBTRANSPage(int pageno, bool writeXlog);
+static int ZeroSUBTRANSPage(int pageno);
static bool SubTransPagePrecedes(int page1, int page2);
-static void WriteZeroPageXlogRec(int pageno);
/*
{
int pageno = TransactionIdToPage(xid);
int entryno = TransactionIdToEntry(xid);
+ int slotno;
TransactionId *ptr;
- LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE);
-
- ptr = (TransactionId *) SimpleLruReadPage(SubTransCtl, pageno, xid, true);
- ptr += entryno;
+ Assert(TransactionIdIsValid(parent));
+ Assert(TransactionIdFollows(xid, parent));
- /* Current state should be 0 or target state */
- Assert(*ptr == InvalidTransactionId || *ptr == parent);
+ LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
- *ptr = parent;
+ slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid);
+ ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
+ ptr += entryno;
- /* ...->page_status[slotno] = SLRU_PAGE_DIRTY; already done */
+ /*
+ * It's possible we'll try to set the parent xid multiple times but we
+ * shouldn't ever be changing the xid from one valid xid to another valid
+ * xid, which would corrupt the data structure.
+ */
+ if (*ptr != parent)
+ {
+ Assert(*ptr == InvalidTransactionId);
+ *ptr = parent;
+ SubTransCtl->shared->page_dirty[slotno] = true;
+ }
- LWLockRelease(SubTransCtl->ControlLock);
+ LWLockRelease(SubtransControlLock);
}
/*
{
int pageno = TransactionIdToPage(xid);
int entryno = TransactionIdToEntry(xid);
+ int slotno;
TransactionId *ptr;
- TransactionId parent;
+ TransactionId parent;
/* Can't ask about stuff that might not be around anymore */
- Assert(TransactionIdFollowsOrEquals(xid, RecentXmin));
+ Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
/* Bootstrap and frozen XIDs have no parent */
if (!TransactionIdIsNormal(xid))
return InvalidTransactionId;
- LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE);
+ /* lock is acquired by SimpleLruReadPage_ReadOnly */
- ptr = (TransactionId *) SimpleLruReadPage(SubTransCtl, pageno, xid, false);
+ slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
+ ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
ptr += entryno;
parent = *ptr;
- LWLockRelease(SubTransCtl->ControlLock);
+ LWLockRelease(SubtransControlLock);
return parent;
}
*
* Returns the topmost transaction of the given transaction id.
*
- * Because we cannot look back further than RecentXmin, it is possible
+ * Because we cannot look back further than TransactionXmin, it is possible
* that this function will lie and return an intermediate subtransaction ID
* instead of the true topmost parent ID. This is OK, because in practice
* we only care about detecting whether the topmost parent is still running
* or is part of a current snapshot's list of still-running transactions.
- * Therefore, any XID before RecentXmin is as good as any other.
+ * Therefore, any XID before TransactionXmin is as good as any other.
*/
TransactionId
SubTransGetTopmostTransaction(TransactionId xid)
{
TransactionId parentXid = xid,
- previousXid = xid;
+ previousXid = xid;
/* Can't ask about stuff that might not be around anymore */
- Assert(TransactionIdFollowsOrEquals(xid, RecentXmin));
+ Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
while (TransactionIdIsValid(parentXid))
{
previousXid = parentXid;
- if (TransactionIdPrecedes(parentXid, RecentXmin))
+ if (TransactionIdPrecedes(parentXid, TransactionXmin))
break;
parentXid = SubTransGetParent(parentXid);
+
+ /*
+ * By convention the parent xid gets allocated first, so should always
+ * precede the child xid. Anything else points to a corrupted data
+ * structure that could lead to an infinite loop, so exit.
+ */
+ if (!TransactionIdPrecedes(parentXid, previousXid))
+ elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u",
+ previousXid, parentXid);
}
Assert(TransactionIdIsValid(previousXid));
/*
- * Initialization of shared memory for Subtrans
+ * Initialization of shared memory for SUBTRANS
*/
-
-int
+Size
SUBTRANSShmemSize(void)
{
- return SimpleLruShmemSize();
+ return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
}
void
SUBTRANSShmemInit(void)
{
- SimpleLruInit(SubTransCtl, "SUBTRANS Ctl", "pg_subtrans");
SubTransCtl->PagePrecedes = SubTransPagePrecedes;
+ SimpleLruInit(SubTransCtl, "subtrans", NUM_SUBTRANS_BUFFERS, 0,
+ SubtransControlLock, "pg_subtrans",
+ LWTRANCHE_SUBTRANS_BUFFERS);
+ /* Override default assumption that writes should be fsync'd */
+ SubTransCtl->do_fsync = false;
}
/*
* This func must be called ONCE on system install. It creates
- * the initial SubTrans segment. (The SubTrans directory is assumed to
- * have been created by initdb, and SubTransShmemInit must have been called
- * already.)
+ * the initial SUBTRANS segment. (The SUBTRANS directory is assumed to
+ * have been created by the initdb shell script, and SUBTRANSShmemInit
+ * must have been called already.)
+ *
+ * Note: it's not really necessary to create the initial segment now,
+ * since slru.c would create it on first write anyway. But we may as well
+ * do it to be sure the directory is set up correctly.
*/
void
BootStrapSUBTRANS(void)
{
int slotno;
- LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE);
+ LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
- /* Create and zero the first page of the commit log */
- slotno = ZeroSUBTRANSPage(0, false);
+ /* Create and zero the first page of the subtrans log */
+ slotno = ZeroSUBTRANSPage(0);
/* Make sure it's written out */
- SimpleLruWritePage(SubTransCtl, slotno, NULL);
- /* Assert(SubTransCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */
+ SimpleLruWritePage(SubTransCtl, slotno);
+ Assert(!SubTransCtl->shared->page_dirty[slotno]);
- LWLockRelease(SubTransCtl->ControlLock);
+ LWLockRelease(SubtransControlLock);
}
/*
- * Initialize (or reinitialize) a page of SubTrans to zeroes.
- * If writeXlog is TRUE, also emit an XLOG record saying we did this.
+ * Initialize (or reinitialize) a page of SUBTRANS to zeroes.
*
* The page is not actually written, just set up in shared memory.
* The slot number of the new page is returned.
* Control lock must be held at entry, and will be held at exit.
*/
static int
-ZeroSUBTRANSPage(int pageno, bool writeXlog)
+ZeroSUBTRANSPage(int pageno)
{
- int slotno = SimpleLruZeroPage(SubTransCtl, pageno);
-
- if (writeXlog)
- WriteZeroPageXlogRec(pageno);
-
- return slotno;
+ return SimpleLruZeroPage(SubTransCtl, pageno);
}
/*
* This must be called ONCE during postmaster or standalone-backend startup,
* after StartupXLOG has initialized ShmemVariableCache->nextXid.
+ *
+ * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid
+ * if there are none.
*/
void
-StartupSUBTRANS(void)
+StartupSUBTRANS(TransactionId oldestActiveXID)
{
+ int startPage;
+ int endPage;
+
/*
- * Initialize our idea of the latest page number.
+ * Since we don't expect pg_subtrans to be valid across crashes, we
+ * initialize the currently-active page(s) to zeroes during startup.
+ * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
+ * the new page without regard to whatever was previously on disk.
*/
- SimpleLruSetLatestPage(SubTransCtl,
- TransactionIdToPage(ShmemVariableCache->nextXid));
+ LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
+
+ startPage = TransactionIdToPage(oldestActiveXID);
+ endPage = TransactionIdToPage(ShmemVariableCache->nextXid);
+
+ while (startPage != endPage)
+ {
+ (void) ZeroSUBTRANSPage(startPage);
+ startPage++;
+ /* must account for wraparound */
+ if (startPage > TransactionIdToPage(MaxTransactionId))
+ startPage = 0;
+ }
+ (void) ZeroSUBTRANSPage(startPage);
+
+ LWLockRelease(SubtransControlLock);
}
/*
void
ShutdownSUBTRANS(void)
{
+ /*
+ * Flush dirty SUBTRANS pages to disk
+ *
+ * This is not actually necessary from a correctness point of view. We do
+ * it merely as a debugging aid.
+ */
+ TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(false);
SimpleLruFlush(SubTransCtl, false);
+ TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(false);
}
/*
void
CheckPointSUBTRANS(void)
{
+ /*
+ * Flush dirty SUBTRANS pages to disk
+ *
+ * This is not actually necessary from a correctness point of view. We do
+ * it merely to improve the odds that writing of dirty pages is done by
+ * the checkpoint process and not by backends.
+ */
+ TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(true);
SimpleLruFlush(SubTransCtl, true);
+ TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(true);
}
/*
- * Make sure that SubTrans has room for a newly-allocated XID.
+ * Make sure that SUBTRANS has room for a newly-allocated XID.
*
* NB: this is called while holding XidGenLock. We want it to be very fast
* most of the time; even when it's not so fast, no actual I/O need happen
- * unless we're forced to write out a dirty subtrans or xlog page to make room
+ * unless we're forced to write out a dirty subtrans page to make room
* in shared memory.
*/
void
pageno = TransactionIdToPage(newestXact);
- LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE);
+ LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
- /* Zero the page and make an XLOG entry about it */
- ZeroSUBTRANSPage(pageno, true);
+ /* Zero the page */
+ ZeroSUBTRANSPage(pageno);
- LWLockRelease(SubTransCtl->ControlLock);
+ LWLockRelease(SubtransControlLock);
}
/*
- * Remove all SubTrans segments before the one holding the passed transaction ID
+ * Remove all SUBTRANS segments before the one holding the passed transaction ID
*
- * When this is called, we know that the database logically contains no
- * reference to transaction IDs older than oldestXact. However, we must
- * not truncate the SubTrans until we have performed a checkpoint, to ensure
- * that no such references remain on disk either; else a crash just after
- * the truncation might leave us with a problem. Since SubTrans segments hold
- * a large number of transactions, the opportunity to actually remove a
- * segment is fairly rare, and so it seems best not to do the checkpoint
- * unless we have confirmed that there is a removable segment. Therefore
- * we issue the checkpoint command here, not in higher-level code as might
- * seem cleaner.
+ * This is normally called during checkpoint, with oldestXact being the
+ * oldest TransactionXmin of any running transaction.
*/
void
TruncateSUBTRANS(TransactionId oldestXact)
int cutoffPage;
/*
- * The cutoff point is the start of the segment containing oldestXact.
- * We pass the *page* containing oldestXact to SimpleLruTruncate.
+ * The cutoff point is the start of the segment containing oldestXact. We
+ * pass the *page* containing oldestXact to SimpleLruTruncate. We step
+ * back one transaction to avoid passing a cutoff page that hasn't been
+ * created yet in the rare case that oldestXact would be the first item on
+ * a page and oldestXact == next XID. In that case, if we didn't subtract
+ * one, we'd trigger SimpleLruTruncate's wraparound detection.
*/
+ TransactionIdRetreat(oldestXact);
cutoffPage = TransactionIdToPage(oldestXact);
+
SimpleLruTruncate(SubTransCtl, cutoffPage);
}
/*
- * Decide which of two SubTrans page numbers is "older" for truncation purposes.
+ * Decide which of two SUBTRANS page numbers is "older" for truncation purposes.
*
* We need to use comparison of TransactionIds here in order to do the right
* thing with wraparound XID arithmetic. However, if we are asked about
return TransactionIdPrecedes(xid1, xid2);
}
-
-
-/*
- * Write a ZEROPAGE xlog record
- *
- * Note: xlog record is marked as outside transaction control, since we
- * want it to be redone whether the invoking transaction commits or not.
- * (Besides which, this is normally done just before entering a transaction.)
- */
-static void
-WriteZeroPageXlogRec(int pageno)
-{
- XLogRecData rdata;
-
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) (&pageno);
- rdata.len = sizeof(int);
- rdata.next = NULL;
- (void) XLogInsert(RM_SLRU_ID, SUBTRANS_ZEROPAGE | XLOG_NO_TRAN, &rdata);
-}
-
-/* Redo a ZEROPAGE action during WAL replay */
-void
-subtrans_zeropage_redo(int pageno)
-{
- int slotno;
-
- LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE);
-
- slotno = ZeroSUBTRANSPage(pageno, false);
- SimpleLruWritePage(SubTransCtl, slotno, NULL);
- /* Assert(SubTransCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */
-
- LWLockRelease(SubTransCtl->ControlLock);
-}