1 /*-------------------------------------------------------------------------
4 * PostgreSQL transaction-commit-log manager
6 * This module replaces the old "pg_log" access code, which treated pg_log
7 * essentially like a relation, in that it went through the regular buffer
8 * manager. The problem with that was that there wasn't any good way to
9 * recycle storage space for transactions so old that they'll never be
10 * looked up again. Now we use specialized access code so that the commit
11 * log can be broken into relatively small, independent segments.
13 * XLOG interactions: this module generates an XLOG record whenever a new
14 * CLOG page is initialized to zeroes. Other writes of CLOG come from
15 * recording of transaction commit or abort in xact.c, which generates its
16 * own XLOG records for these events and will re-perform the status update
17 * on redo; so we need make no additional XLOG entry here. Also, the XLOG
18 * is guaranteed flushed through the XLOG commit record before we are called
19 * to log a commit, so the WAL rule "write xlog before data" is satisfied
20 * automatically for commits, and we don't really care for aborts. Therefore,
21 * we don't need to mark CLOG pages with LSN information; we have enough
22 * synchronization already.
24 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
25 * Portions Copyright (c) 1994, Regents of the University of California
27 * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.23 2004/08/23 23:22:44 tgl Exp $
29 *-------------------------------------------------------------------------
33 #include "access/clog.h"
34 #include "access/slru.h"
35 #include "postmaster/bgwriter.h"
39 * Defines for CLOG page sizes. A page is the same BLCKSZ as is used
40 * everywhere else in Postgres.
42 * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
43 * CLOG page numbering also wraps around at 0xFFFFFFFF/CLOG_XACTS_PER_PAGE,
44 * and CLOG segment numbering at 0xFFFFFFFF/CLOG_XACTS_PER_SEGMENT. We need
45 * take no explicit notice of that fact in this module, except when comparing
46 * segment and page numbers in TruncateCLOG (see CLOGPagePrecedes).
49 /* We need two bits per xact, so four xacts fit in a byte */
50 #define CLOG_BITS_PER_XACT 2
51 #define CLOG_XACTS_PER_BYTE 4
52 #define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
53 #define CLOG_XACT_BITMASK ((1 << CLOG_BITS_PER_XACT) - 1)
55 #define TransactionIdToPage(xid) ((xid) / (TransactionId) CLOG_XACTS_PER_PAGE)
56 #define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE)
57 #define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
58 #define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
62 * Link to shared-memory data structures for CLOG control
64 static SlruCtlData ClogCtlData;
65 #define ClogCtl (&ClogCtlData)
68 static int ZeroCLOGPage(int pageno, bool writeXlog);
69 static bool CLOGPagePrecedes(int page1, int page2);
70 static void WriteZeroPageXlogRec(int pageno);
74 * Record the final state of a transaction in the commit log.
76 * NB: this is a low-level routine and is NOT the preferred entry point
77 * for most uses; TransactionLogUpdate() in transam.c is the intended caller.
80 TransactionIdSetStatus(TransactionId xid, XidStatus status)
82 int pageno = TransactionIdToPage(xid);
83 int byteno = TransactionIdToByte(xid);
84 int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
89 Assert(status == TRANSACTION_STATUS_COMMITTED ||
90 status == TRANSACTION_STATUS_ABORTED ||
91 status == TRANSACTION_STATUS_SUB_COMMITTED);
93 LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
95 slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
96 byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
98 /* Current state should be 0, subcommitted or target state */
99 Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 ||
100 ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_SUB_COMMITTED ||
101 ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == status);
103 /* note this assumes exclusive access to the clog page */
105 byteval &= ~(((1 << CLOG_BITS_PER_XACT) - 1) << bshift);
106 byteval |= (status << bshift);
109 ClogCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY;
111 LWLockRelease(CLogControlLock);
115 * Interrogate the state of a transaction in the commit log.
117 * NB: this is a low-level routine and is NOT the preferred entry point
118 * for most uses; TransactionLogFetch() in transam.c is the intended caller.
121 TransactionIdGetStatus(TransactionId xid)
123 int pageno = TransactionIdToPage(xid);
124 int byteno = TransactionIdToByte(xid);
125 int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
130 LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
132 slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
133 byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
135 status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
137 LWLockRelease(CLogControlLock);
144 * Initialization of shared memory for CLOG
150 return SimpleLruShmemSize();
156 ClogCtl->PagePrecedes = CLOGPagePrecedes;
157 SimpleLruInit(ClogCtl, "CLOG Ctl", CLogControlLock, "pg_clog");
161 * This func must be called ONCE on system install. It creates
162 * the initial CLOG segment. (The CLOG directory is assumed to
163 * have been created by the initdb shell script, and CLOGShmemInit
164 * must have been called already.)
171 LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
173 /* Create and zero the first page of the commit log */
174 slotno = ZeroCLOGPage(0, false);
176 /* Make sure it's written out */
177 SimpleLruWritePage(ClogCtl, slotno, NULL);
178 Assert(ClogCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
180 LWLockRelease(CLogControlLock);
184 * Initialize (or reinitialize) a page of CLOG to zeroes.
185 * If writeXlog is TRUE, also emit an XLOG record saying we did this.
187 * The page is not actually written, just set up in shared memory.
188 * The slot number of the new page is returned.
190 * Control lock must be held at entry, and will be held at exit.
193 ZeroCLOGPage(int pageno, bool writeXlog)
197 slotno = SimpleLruZeroPage(ClogCtl, pageno);
200 WriteZeroPageXlogRec(pageno);
206 * This must be called ONCE during postmaster or standalone-backend startup,
207 * after StartupXLOG has initialized ShmemVariableCache->nextXid.
213 * Initialize our idea of the latest page number.
215 ClogCtl->shared->latest_page_number = TransactionIdToPage(ShmemVariableCache->nextXid);
219 * This must be called ONCE during postmaster or standalone-backend shutdown
224 /* Flush dirty CLOG pages to disk */
225 SimpleLruFlush(ClogCtl, false);
229 * Perform a checkpoint --- either during shutdown, or on-the-fly
234 /* Flush dirty CLOG pages to disk */
235 SimpleLruFlush(ClogCtl, true);
240 * Make sure that CLOG has room for a newly-allocated XID.
242 * NB: this is called while holding XidGenLock. We want it to be very fast
243 * most of the time; even when it's not so fast, no actual I/O need happen
244 * unless we're forced to write out a dirty clog or xlog page to make room
248 ExtendCLOG(TransactionId newestXact)
253 * No work except at first XID of a page. But beware: just after
254 * wraparound, the first XID of page zero is FirstNormalTransactionId.
256 if (TransactionIdToPgIndex(newestXact) != 0 &&
257 !TransactionIdEquals(newestXact, FirstNormalTransactionId))
260 pageno = TransactionIdToPage(newestXact);
262 LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
264 /* Zero the page and make an XLOG entry about it */
265 ZeroCLOGPage(pageno, true);
267 LWLockRelease(CLogControlLock);
272 * Remove all CLOG segments before the one holding the passed transaction ID
274 * When this is called, we know that the database logically contains no
275 * reference to transaction IDs older than oldestXact. However, we must
276 * not truncate the CLOG until we have performed a checkpoint, to ensure
277 * that no such references remain on disk either; else a crash just after
278 * the truncation might leave us with a problem. Since CLOG segments hold
279 * a large number of transactions, the opportunity to actually remove a
280 * segment is fairly rare, and so it seems best not to do the checkpoint
281 * unless we have confirmed that there is a removable segment. Therefore
282 * we issue the checkpoint command here, not in higher-level code as might
286 TruncateCLOG(TransactionId oldestXact)
291 * The cutoff point is the start of the segment containing oldestXact.
292 * We pass the *page* containing oldestXact to SimpleLruTruncate.
294 cutoffPage = TransactionIdToPage(oldestXact);
296 /* Check to see if there's any files that could be removed */
297 if (!SlruScanDirectory(ClogCtl, cutoffPage, false))
298 return; /* nothing to remove */
300 /* Perform a CHECKPOINT */
301 RequestCheckpoint(true);
303 /* Now we can remove the old CLOG segment(s) */
304 SimpleLruTruncate(ClogCtl, cutoffPage);
309 * Decide which of two CLOG page numbers is "older" for truncation purposes.
311 * We need to use comparison of TransactionIds here in order to do the right
312 * thing with wraparound XID arithmetic. However, if we are asked about
313 * page number zero, we don't want to hand InvalidTransactionId to
314 * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So,
315 * offset both xids by FirstNormalTransactionId to avoid that.
318 CLOGPagePrecedes(int page1, int page2)
323 xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE;
324 xid1 += FirstNormalTransactionId;
325 xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE;
326 xid2 += FirstNormalTransactionId;
328 return TransactionIdPrecedes(xid1, xid2);
333 * Write a ZEROPAGE xlog record
335 * Note: xlog record is marked as outside transaction control, since we
336 * want it to be redone whether the invoking transaction commits or not.
337 * (Besides which, this is normally done just before entering a transaction.)
340 WriteZeroPageXlogRec(int pageno)
344 rdata.buffer = InvalidBuffer;
345 rdata.data = (char *) (&pageno);
346 rdata.len = sizeof(int);
348 (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata);
352 * CLOG resource manager's routines
355 clog_redo(XLogRecPtr lsn, XLogRecord *record)
357 uint8 info = record->xl_info & ~XLR_INFO_MASK;
359 if (info == CLOG_ZEROPAGE)
364 memcpy(&pageno, XLogRecGetData(record), sizeof(int));
366 LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
368 slotno = ZeroCLOGPage(pageno, false);
369 SimpleLruWritePage(ClogCtl, slotno, NULL);
370 Assert(ClogCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
372 LWLockRelease(CLogControlLock);
377 clog_undo(XLogRecPtr lsn, XLogRecord *record)
382 clog_desc(char *buf, uint8 xl_info, char *rec)
384 uint8 info = xl_info & ~XLR_INFO_MASK;
386 if (info == CLOG_ZEROPAGE)
390 memcpy(&pageno, rec, sizeof(int));
391 sprintf(buf + strlen(buf), "zeropage: %d", pageno);
394 strcat(buf, "UNKNOWN");