From 2042b3428d3947987b27dbd4598fd9a5716ec9e8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 21 Jul 2004 22:31:26 +0000 Subject: [PATCH] Invent WAL timelines, as per recent discussion, to make point-in-time recovery more manageable. Also, undo recent change to add FILE_HEADER and WASTED_SPACE records to XLOG; instead make the XLOG page header variable-size with extra fields in the first page of an XLOG file. This should fix the boundary-case bugs observed by Mark Kirkwood. initdb forced due to change of XLOG representation. --- doc/src/sgml/page.sgml | 19 +- src/backend/access/heap/heapam.c | 24 +- src/backend/access/nbtree/nbtinsert.c | 20 +- src/backend/access/nbtree/nbtpage.c | 20 +- src/backend/access/nbtree/nbtsort.c | 8 +- src/backend/access/nbtree/nbtxlog.c | 24 +- .../access/transam/recovery.conf.sample | 8 + src/backend/access/transam/rmgr.c | 6 +- src/backend/access/transam/slru.c | 3 +- src/backend/access/transam/xlog.c | 1456 +++++++++++------ src/backend/access/transam/xlogutils.c | 156 +- src/backend/commands/sequence.c | 12 +- src/backend/commands/tablecmds.c | 4 +- src/backend/commands/vacuum.c | 14 +- src/backend/commands/vacuumlazy.c | 4 +- src/backend/postmaster/pgarch.c | 36 +- src/backend/storage/lmgr/deadlock.c | 3 +- src/bin/pg_controldata/pg_controldata.c | 4 +- src/bin/pg_resetxlog/pg_resetxlog.c | 88 +- src/include/access/xact.h | 3 +- src/include/access/xlog.h | 108 +- src/include/access/xlog_internal.h | 224 +++ src/include/access/xlogdefs.h | 22 +- src/include/access/xlogutils.h | 6 +- src/include/catalog/pg_control.h | 20 +- src/include/storage/bufpage.h | 30 +- 26 files changed, 1331 insertions(+), 991 deletions(-) create mode 100644 src/include/access/xlog_internal.h diff --git a/doc/src/sgml/page.sgml b/doc/src/sgml/page.sgml index ee619093a3..ebafa46598 100644 --- a/doc/src/sgml/page.sgml +++ b/doc/src/sgml/page.sgml @@ -1,5 +1,5 @@ @@ -114,37 +114,38 @@ data. Empty in ordinary tables. pd_lsn XLogRecPtr 8 bytes - LSN: next byte after last byte of xlog + LSN: next byte after last byte of xlog record for last change + to this page - pd_sui - StartUpID + pd_tli + TimeLineID 4 bytes - SUI of last changes (currently it's used by heap AM only) + TLI of last change pd_lower LocationIndex 2 bytes - Offset to start of free space. + Offset to start of free space pd_upper LocationIndex 2 bytes - Offset to end of free space. + Offset to end of free space pd_special LocationIndex 2 bytes - Offset to start of special space. + Offset to start of special space pd_pagesize_version uint16 2 bytes - Page size and layout version number information. + Page size and layout version number information diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index f0be8123f5..6e65966c93 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.170 2004/07/11 18:01:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.171 2004/07/21 22:31:19 tgl Exp $ * * * INTERFACE ROUTINES @@ -1214,7 +1214,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) recptr = XLogInsert(RM_HEAP_ID, info, rdata); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } else { @@ -1390,7 +1390,7 @@ l1: recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata); PageSetLSN(dp, recptr); - PageSetSUI(dp, ThisStartUpID); + PageSetTLI(dp, ThisTimeLineID); } else { @@ -1748,10 +1748,10 @@ l2: if (newbuf != buffer) { PageSetLSN(BufferGetPage(newbuf), recptr); - PageSetSUI(BufferGetPage(newbuf), ThisStartUpID); + PageSetTLI(BufferGetPage(newbuf), ThisTimeLineID); } PageSetLSN(BufferGetPage(buffer), recptr); - PageSetSUI(BufferGetPage(buffer), ThisStartUpID); + PageSetTLI(BufferGetPage(buffer), ThisTimeLineID); } else { @@ -1902,7 +1902,7 @@ l3: * XLOG stuff: no logging is required as long as we have no * savepoints. For savepoints private log could be used... */ - PageSetSUI(BufferGetPage(*buffer), ThisStartUpID); + PageSetTLI(BufferGetPage(*buffer), ThisTimeLineID); /* store transaction information of xact marking the tuple */ tuple->t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED | @@ -2184,7 +2184,7 @@ heap_xlog_clean(bool redo, XLogRecPtr lsn, XLogRecord *record) PageRepairFragmentation(page, NULL); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); /* prev sui */ + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -2217,7 +2217,7 @@ heap_xlog_newpage(bool redo, XLogRecPtr lsn, XLogRecord *record) memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -2283,7 +2283,7 @@ heap_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record) /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = xlrec->target.tid; PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); return; @@ -2368,7 +2368,7 @@ heap_xlog_insert(bool redo, XLogRecPtr lsn, XLogRecord *record) if (offnum == InvalidOffsetNumber) elog(PANIC, "heap_insert_redo: failed to add tuple"); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); /* prev sui */ + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); return; @@ -2466,7 +2466,7 @@ heap_xlog_update(bool redo, XLogRecPtr lsn, XLogRecord *record, bool move) if (samepage) goto newsame; PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); goto newt; @@ -2564,7 +2564,7 @@ newsame:; if (offnum == InvalidOffsetNumber) elog(PANIC, "heap_update_redo: failed to add tuple"); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); /* prev sui */ + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); return; diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 6a4ecaeb0c..ed08d65d99 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.112 2004/04/21 18:24:25 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.113 2004/07/21 22:31:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -621,11 +621,11 @@ _bt_insertonpg(Relation rel, if (BufferIsValid(metabuf)) { PageSetLSN(metapg, recptr); - PageSetSUI(metapg, ThisStartUpID); + PageSetTLI(metapg, ThisTimeLineID); } PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); @@ -903,13 +903,13 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata); PageSetLSN(leftpage, recptr); - PageSetSUI(leftpage, ThisStartUpID); + PageSetTLI(leftpage, ThisTimeLineID); PageSetLSN(rightpage, recptr); - PageSetSUI(rightpage, ThisStartUpID); + PageSetTLI(rightpage, ThisTimeLineID); if (!P_RIGHTMOST(ropaque)) { PageSetLSN(spage, recptr); - PageSetSUI(spage, ThisStartUpID); + PageSetTLI(spage, ThisTimeLineID); } } @@ -1494,13 +1494,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata); PageSetLSN(rootpage, recptr); - PageSetSUI(rootpage, ThisStartUpID); + PageSetTLI(rootpage, ThisTimeLineID); PageSetLSN(metapg, recptr); - PageSetSUI(metapg, ThisStartUpID); + PageSetTLI(metapg, ThisTimeLineID); PageSetLSN(lpage, recptr); - PageSetSUI(lpage, ThisStartUpID); + PageSetTLI(lpage, ThisTimeLineID); PageSetLSN(rpage, recptr); - PageSetSUI(rpage, ThisStartUpID); + PageSetTLI(rpage, ThisTimeLineID); } END_CRIT_SECTION(); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 002fb018db..c8f482545e 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.76 2004/06/02 17:28:17 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.77 2004/07/21 22:31:20 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -84,7 +84,7 @@ _bt_metapinit(Relation rel) rdata); PageSetLSN(pg, recptr); - PageSetSUI(pg, ThisStartUpID); + PageSetTLI(pg, ThisTimeLineID); } END_CRIT_SECTION(); @@ -249,9 +249,9 @@ _bt_getroot(Relation rel, int access) recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata); PageSetLSN(rootpage, recptr); - PageSetSUI(rootpage, ThisStartUpID); + PageSetTLI(rootpage, ThisTimeLineID); PageSetLSN(metapg, recptr); - PageSetSUI(metapg, ThisStartUpID); + PageSetTLI(metapg, ThisTimeLineID); } END_CRIT_SECTION(); @@ -686,7 +686,7 @@ _bt_delitems(Relation rel, Buffer buf, recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); @@ -1080,22 +1080,22 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) if (BufferIsValid(metabuf)) { PageSetLSN(metapg, recptr); - PageSetSUI(metapg, ThisStartUpID); + PageSetTLI(metapg, ThisTimeLineID); } page = BufferGetPage(pbuf); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); page = BufferGetPage(rbuf); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); page = BufferGetPage(buf); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); if (BufferIsValid(lbuf)) { page = BufferGetPage(lbuf); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } } diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index d2bafb3957..28f147fcaf 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -56,7 +56,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.84 2004/07/19 02:47:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.85 2004/07/21 22:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -299,14 +299,14 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); END_CRIT_SECTION(); } else { - /* Leave the page LSN zero if not WAL-logged, but set SUI anyway */ - PageSetSUI(page, ThisStartUpID); + /* Leave the page LSN zero if not WAL-logged, but set TLI anyway */ + PageSetTLI(page, ThisTimeLineID); } /* diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 73e2ddf287..2befeb1aa3 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.15 2004/07/11 18:01:45 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.16 2004/07/21 22:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -136,7 +136,7 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn, pageop->btpo_flags = BTP_META; PageSetLSN(metapg, lsn); - PageSetSUI(metapg, ThisStartUpID); + PageSetTLI(metapg, ThisTimeLineID); LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); WriteBuffer(metabuf); } @@ -197,7 +197,7 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta, elog(PANIC, "btree_insert_redo: failed to add item"); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -281,7 +281,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot, xlrec->leftlen); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -317,7 +317,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot, record->xl_len - SizeOfBtreeSplit - xlrec->leftlen); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -353,7 +353,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot, pageop->btpo_prev = rightsib; PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -420,7 +420,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record) } PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -489,7 +489,7 @@ btree_xlog_delete_page(bool redo, bool ismeta, } PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -515,7 +515,7 @@ btree_xlog_delete_page(bool redo, bool ismeta, pageop->btpo_prev = leftsib; PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -543,7 +543,7 @@ btree_xlog_delete_page(bool redo, bool ismeta, pageop->btpo_next = rightsib; PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -569,7 +569,7 @@ btree_xlog_delete_page(bool redo, bool ismeta, pageop->btpo_flags = BTP_DELETED; PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -632,7 +632,7 @@ btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record) record->xl_len - SizeOfBtreeNewroot); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); diff --git a/src/backend/access/transam/recovery.conf.sample b/src/backend/access/transam/recovery.conf.sample index 8a0801764c..e3068d535d 100644 --- a/src/backend/access/transam/recovery.conf.sample +++ b/src/backend/access/transam/recovery.conf.sample @@ -63,4 +63,12 @@ # #recovery_target_inclusive = 'true' # 'true' or 'false' # +# +# If you want to recover into a timeline other than the "main line" shown in +# pg_control, specify the timeline number here, or write 'latest' to get +# the latest branch for which there's a history file. +# +#recovery_target_timeline = '33' # number or 'latest' +# +# #--------------------------------------------------------------------------- diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c index d6c8c93ca6..ad68e4c99b 100644 --- a/src/backend/access/transam/rmgr.c +++ b/src/backend/access/transam/rmgr.c @@ -3,7 +3,7 @@ * * Resource managers definition * - * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.13 2004/07/01 00:49:42 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.14 2004/07/21 22:31:20 tgl Exp $ */ #include "postgres.h" @@ -14,12 +14,12 @@ #include "access/rtree.h" #include "access/slru.h" #include "access/xact.h" -#include "access/xlog.h" +#include "access/xlog_internal.h" #include "storage/smgr.h" #include "commands/sequence.h" -RmgrData RmgrTable[RM_MAX_ID + 1] = { +const RmgrData RmgrTable[RM_MAX_ID + 1] = { {"XLOG", xlog_redo, xlog_undo, xlog_desc, NULL, NULL}, {"Transaction", xact_redo, xact_undo, xact_desc, NULL, NULL}, {"Storage", smgr_redo, smgr_undo, smgr_desc, NULL, NULL}, diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 0181e2d626..d45a7d9f61 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.17 2004/07/01 00:49:42 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.18 2004/07/21 22:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,7 @@ #include "postmaster/bgwriter.h" #include "storage/fd.h" #include "storage/lwlock.h" +#include "storage/shmem.h" #include "miscadmin.h" diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 358966d4fc..40c11fb6bd 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.149 2004/07/19 14:34:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.150 2004/07/21 22:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,12 +24,13 @@ #include "access/clog.h" #include "access/subtrans.h" -#include "access/transam.h" #include "access/xact.h" #include "access/xlog.h" +#include "access/xlog_internal.h" #include "access/xlogutils.h" #include "catalog/catversion.h" #include "catalog/pg_control.h" +#include "miscadmin.h" #include "postmaster/bgwriter.h" #include "storage/bufpage.h" #include "storage/fd.h" @@ -41,7 +42,6 @@ #include "utils/builtins.h" #include "utils/guc.h" #include "utils/relcache.h" -#include "miscadmin.h" /* @@ -121,25 +121,57 @@ static int open_sync_bit = DEFAULT_SYNC_FLAGBIT; /* - * ThisStartUpID will be same in all backends --- it identifies current - * instance of the database system. + * ThisTimeLineID will be same in all backends --- it identifies current + * WAL timeline for the database system. */ -StartUpID ThisStartUpID = 0; +TimeLineID ThisTimeLineID = 0; /* Are we doing recovery from XLOG? */ bool InRecovery = false; /* Are we recovering using offline XLOG archives? */ static bool InArchiveRecovery = false; -/* Was the last file restored from archive, or local? */ +/* Was the last xlog file restored from archive, or local? */ static bool restoredFromArchive = false; -static char recoveryRestoreCommand[MAXPGPATH]; +/* options taken from recovery.conf */ +static char *recoveryRestoreCommand = NULL; static bool recoveryTarget = false; static bool recoveryTargetExact = false; static bool recoveryTargetInclusive = true; static TransactionId recoveryTargetXid; static time_t recoveryTargetTime; +/* if recoveryStopsHere returns true, it saves actual stop xid/time here */ +static TransactionId recoveryStopXid; +static time_t recoveryStopTime; +static bool recoveryStopAfter; + +/* + * During normal operation, the only timeline we care about is ThisTimeLineID. + * During recovery, however, things are more complicated. To simplify life + * for rmgr code, we keep ThisTimeLineID set to the "current" timeline as we + * scan through the WAL history (that is, it is the line that was active when + * the currently-scanned WAL record was generated). We also need these + * timeline values: + * + * recoveryTargetTLI: the desired timeline that we want to end in. + * + * expectedTLIs: an integer list of recoveryTargetTLI and the TLIs of + * its known parents, newest first (so recoveryTargetTLI is always the + * first list member). Only these TLIs are expected to be seen in the WAL + * segments we read, and indeed only these TLIs will be considered as + * candidate WAL files to open at all. + * + * curFileTLI: the TLI appearing in the name of the current input WAL file. + * (This is not necessarily the same as ThisTimeLineID, because we could + * be scanning data that was copied from an ancestor timeline when the current + * file was created.) During a sequential scan we do not allow this value + * to decrease. + */ +static TimeLineID recoveryTargetTLI; +static List *expectedTLIs; +static TimeLineID curFileTLI; + /* * MyLastRecPtr points to the start of the last XLOG record inserted by the * current transaction. If MyLastRecPtr.xrecoff == 0, then the current @@ -242,12 +274,19 @@ static XLogRecPtr RedoRecPtr; * *---------- */ + typedef struct XLogwrtRqst { XLogRecPtr Write; /* last byte + 1 to write out */ XLogRecPtr Flush; /* last byte + 1 to flush */ } XLogwrtRqst; +typedef struct XLogwrtResult +{ + XLogRecPtr Write; /* last byte + 1 written out */ + XLogRecPtr Flush; /* last byte + 1 flushed */ +} XLogwrtResult; + /* * Shared state data for XLogInsert. */ @@ -293,7 +332,7 @@ typedef struct XLogCtlData XLogRecPtr *xlblocks; /* 1st byte ptr-s + BLCKSZ */ uint32 XLogCacheByte; /* # bytes in xlog buffers */ uint32 XLogCacheBlck; /* highest allocated xlog buffer index */ - StartUpID ThisStartUpID; + TimeLineID ThisTimeLineID; slock_t info_lck; /* locks shared LogwrtRqst/LogwrtResult */ } XLogCtlData; @@ -323,99 +362,15 @@ static ControlFileData *ControlFile = NULL; XLogCtl->xlblocks[curridx].xrecoff - INSERT_FREESPACE(Insert) \ ) - -/* Increment an xlogid/segment pair */ -#define NextLogSeg(logId, logSeg) \ - do { \ - if ((logSeg) >= XLogSegsPerFile-1) \ - { \ - (logId)++; \ - (logSeg) = 0; \ - } \ - else \ - (logSeg)++; \ - } while (0) - -/* Decrement an xlogid/segment pair (assume it's not 0,0) */ -#define PrevLogSeg(logId, logSeg) \ - do { \ - if (logSeg) \ - (logSeg)--; \ - else \ - { \ - (logId)--; \ - (logSeg) = XLogSegsPerFile-1; \ - } \ - } while (0) - -/* - * Compute ID and segment from an XLogRecPtr. - * - * For XLByteToSeg, do the computation at face value. For XLByteToPrevSeg, - * a boundary byte is taken to be in the previous segment. This is suitable - * for deciding which segment to write given a pointer to a record end, - * for example. (We can assume xrecoff is not zero, since no valid recptr - * can have that.) - */ -#define XLByteToSeg(xlrp, logId, logSeg) \ - ( logId = (xlrp).xlogid, \ - logSeg = (xlrp).xrecoff / XLogSegSize \ - ) -#define XLByteToPrevSeg(xlrp, logId, logSeg) \ - ( logId = (xlrp).xlogid, \ - logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \ - ) - -/* - * Is an XLogRecPtr within a particular XLOG segment? - * - * For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg, - * a boundary byte is taken to be in the previous segment. - */ -#define XLByteInSeg(xlrp, logId, logSeg) \ - ((xlrp).xlogid == (logId) && \ - (xlrp).xrecoff / XLogSegSize == (logSeg)) - -#define XLByteInPrevSeg(xlrp, logId, logSeg) \ - ((xlrp).xlogid == (logId) && \ - ((xlrp).xrecoff - 1) / XLogSegSize == (logSeg)) - - #define PrevBufIdx(idx) \ (((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1)) #define NextBufIdx(idx) \ (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1)) -#define XRecOffIsValid(xrecoff) \ - ((xrecoff) % BLCKSZ >= SizeOfXLogPHD && \ - (BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord) - -/* - * These macros encapsulate knowledge about the exact layout of XLog file - * names as well as archive-status file names. - */ -#define MAXFNAMELEN 32 - -#define XLogFileName(fname, log, seg) \ - snprintf(fname, MAXFNAMELEN, "%08X%08X", log, seg) - -#define XLogFilePath(path, log, seg) \ - snprintf(path, MAXPGPATH, "%s/%08X%08X", XLogDir, log, seg) - -#define StatusFilePath(path, xlog, suffix) \ - snprintf(path, MAXPGPATH, "%s/archive_status/%s%s", XLogDir, xlog, suffix) - -/* - * _INTL_MAXLOGRECSZ: max space needed for a record including header and - * any backup-block data. - */ -#define _INTL_MAXLOGRECSZ (SizeOfXLogRecord + MAXLOGRECSZ + \ - XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ)) - /* File path names */ -static char XLogDir[MAXPGPATH]; +char XLogDir[MAXPGPATH]; static char ControlFilePath[MAXPGPATH]; /* @@ -453,36 +408,44 @@ static char *readBuf = NULL; static XLogRecPtr ReadRecPtr; static XLogRecPtr EndRecPtr; static XLogRecord *nextRecord = NULL; -static StartUpID lastReadSUI; +static TimeLineID lastPageTLI = 0; static bool InRedo = false; + static void XLogArchiveNotify(const char *xlog); static void XLogArchiveNotifySeg(uint32 log, uint32 seg); static bool XLogArchiveIsDone(const char *xlog); static void XLogArchiveCleanup(const char *xlog); static void readRecoveryCommandFile(void); -static void exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, - uint32 xrecoff); +static void exitArchiveRecovery(TimeLineID endTLI, + uint32 endLogId, uint32 endLogSeg); static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); static bool AdvanceXLInsertBuffer(void); -static bool WasteXLInsertBuffer(void); static void XLogWrite(XLogwrtRqst WriteRqst); static int XLogFileInit(uint32 log, uint32 seg, bool *use_existent, bool use_lock); static bool InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, bool find_free, int max_advance, bool use_lock); -static int XLogFileOpen(uint32 log, uint32 seg, bool econt); -static void RestoreArchivedXLog(char *path, uint32 log, uint32 seg); +static int XLogFileOpen(uint32 log, uint32 seg); +static int XLogFileRead(uint32 log, uint32 seg, int emode); +static bool RestoreArchivedFile(char *path, const char *xlogfname, + const char *recovername); static void PreallocXlogFiles(XLogRecPtr endptr); static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr); static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer); -static bool ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI); +static bool ValidXLOGHeader(XLogPageHeader hdr, int emode); static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt, char *buffer); +static List *readTimeLineHistory(TimeLineID targetTLI); +static bool existsTimeLineHistory(TimeLineID probeTLI); +static TimeLineID findNewestTimeLine(TimeLineID startTLI); +static void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, + TimeLineID endTLI, + uint32 endLogId, uint32 endLogSeg); static void WriteControlFile(void); static void ReadControlFile(void); static char *str_time(time_t tnow); @@ -546,7 +509,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID) { RecPtr.xlogid = 0; - RecPtr.xrecoff = SizeOfXLogPHD; /* start of 1st checkpoint record */ + RecPtr.xrecoff = SizeOfXLogLongPHD; /* start of 1st chkpt record */ return (RecPtr); } @@ -755,16 +718,9 @@ begin:; } /* - * Determine exactly where we will place the new XLOG record. If there - * isn't enough space on the current XLOG page for a record header, - * advance to the next page (leaving the unused space as zeroes). - * If there isn't enough space in the current XLOG segment for the whole - * record, advance to the next segment (inserting wasted-space records). - * This avoids needing a continuation record at the start of a segment - * file, which would conflict with placing a FILE_HEADER record there. - * We assume that no XLOG record can be larger than a segment file... + * If there isn't enough space on the current XLOG page for a record + * header, advance to the next page (leaving the unused space as zeroes). */ - updrqst = false; freespace = INSERT_FREESPACE(Insert); if (freespace < SizeOfXLogRecord) @@ -773,27 +729,6 @@ begin:; freespace = INSERT_FREESPACE(Insert); } - if (freespace < (uint32) (SizeOfXLogRecord + write_len)) - { - /* Doesn't fit on this page, so check for overrunning the file */ - uint32 avail; - - /* First figure the space available in remaining pages of file */ - avail = XLogSegSize - BLCKSZ - - (Insert->currpage->xlp_pageaddr.xrecoff % XLogSegSize); - avail /= BLCKSZ; /* convert to pages, then usable bytes */ - avail *= (BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord); - avail += freespace; /* add in the current page too */ - if (avail < (uint32) (SizeOfXLogRecord + write_len)) - { - /* It overruns the file, so waste the rest of the file... */ - do { - updrqst = WasteXLInsertBuffer(); - } while ((Insert->currpage->xlp_pageaddr.xrecoff % XLogSegSize) != 0); - freespace = INSERT_FREESPACE(Insert); - } - } - curridx = Insert->curridx; record = (XLogRecord *) Insert->currpos; @@ -891,14 +826,12 @@ begin:; /* Use next buffer */ updrqst = AdvanceXLInsertBuffer(); curridx = Insert->curridx; - /* This assert checks we did not insert a file header record */ - Assert(INSERT_FREESPACE(Insert) == BLCKSZ - SizeOfXLogPHD); /* Insert cont-record header */ Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD; contrecord = (XLogContRecord *) Insert->currpos; contrecord->xl_rem_len = write_len; Insert->currpos += SizeOfXLogContRecord; - freespace = BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord; + freespace = INSERT_FREESPACE(Insert); } /* Ensure next record will be properly aligned */ @@ -949,9 +882,9 @@ begin:; * Create an archive notification file * * The name of the notification file is the message that will be picked up - * by the archiver, e.g. we write 00000001000000C6.ready - * and the archiver then knows to archive XLogDir/00000001000000C6, - * then when complete, rename it to 00000001000000C6.done + * by the archiver, e.g. we write 0000000100000001000000C6.ready + * and the archiver then knows to archive XLogDir/0000000100000001000000C6, + * then when complete, rename it to 0000000100000001000000C6.done */ static void XLogArchiveNotify(const char *xlog) @@ -990,7 +923,7 @@ XLogArchiveNotifySeg(uint32 log, uint32 seg) { char xlog[MAXFNAMELEN]; - XLogFileName(xlog, log, seg); + XLogFileName(xlog, ThisTimeLineID, log, seg); XLogArchiveNotify(xlog); } @@ -1035,16 +968,22 @@ XLogArchiveIsDone(const char *xlog) /* * XLogArchiveCleanup * - * Cleanup an archive notification file for a particular xlog segment + * Cleanup archive notification file(s) for a particular xlog segment */ static void XLogArchiveCleanup(const char *xlog) { char archiveStatusPath[MAXPGPATH]; + /* Remove the .done file */ StatusFilePath(archiveStatusPath, xlog, ".done"); unlink(archiveStatusPath); /* should we complain about failure? */ + + /* Remove the .ready file if present --- normally it shouldn't be */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + unlink(archiveStatusPath); + /* should we complain about failure? */ } /* @@ -1151,7 +1090,7 @@ AdvanceXLInsertBuffer(void) NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * BLCKSZ); Insert->curridx = nextidx; Insert->currpage = NewPage; - Insert->currpos = ((char *) NewPage) + SizeOfXLogPHD; + Insert->currpos = ((char *) NewPage) + SizeOfXLogShortPHD; /* * Be sure to re-zero the buffer so that bytes beyond what we've @@ -1164,103 +1103,26 @@ AdvanceXLInsertBuffer(void) */ NewPage->xlp_magic = XLOG_PAGE_MAGIC; /* NewPage->xlp_info = 0; */ /* done by memset */ - NewPage->xlp_sui = ThisStartUpID; + NewPage->xlp_tli = ThisTimeLineID; NewPage->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid; NewPage->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - BLCKSZ; /* - * If first page of an XLOG segment file, add a FILE_HEADER record. + * If first page of an XLOG segment file, make it a long header. */ if ((NewPage->xlp_pageaddr.xrecoff % XLogSegSize) == 0) { - XLogRecPtr RecPtr; - XLogRecord *record; - XLogFileHeaderData *fhdr; - crc64 crc; + XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage; - record = (XLogRecord *) Insert->currpos; - record->xl_prev = Insert->PrevRecord; - record->xl_xact_prev.xlogid = 0; - record->xl_xact_prev.xrecoff = 0; - record->xl_xid = InvalidTransactionId; - record->xl_len = SizeOfXLogFHD; - record->xl_info = XLOG_FILE_HEADER; - record->xl_rmid = RM_XLOG_ID; - fhdr = (XLogFileHeaderData *) XLogRecGetData(record); - fhdr->xlfhd_sysid = ControlFile->system_identifier; - fhdr->xlfhd_xlogid = NewPage->xlp_pageaddr.xlogid; - fhdr->xlfhd_segno = NewPage->xlp_pageaddr.xrecoff / XLogSegSize; - fhdr->xlfhd_seg_size = XLogSegSize; - - INIT_CRC64(crc); - COMP_CRC64(crc, fhdr, SizeOfXLogFHD); - COMP_CRC64(crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(crc); - record->xl_crc = crc; - - /* Compute record's XLOG location */ - INSERT_RECPTR(RecPtr, Insert, nextidx); - - /* Record begin of record in appropriate places */ - Insert->PrevRecord = RecPtr; - - Insert->currpos += SizeOfXLogRecord + SizeOfXLogFHD; + NewLongPage->xlp_sysid = ControlFile->system_identifier; + NewLongPage->xlp_seg_size = XLogSegSize; + NewPage->xlp_info |= XLP_LONG_HEADER; + Insert->currpos = ((char *) NewPage) + SizeOfXLogLongPHD; } return update_needed; } -/* - * Fill the remainder of the current XLOG page with an XLOG_WASTED_SPACE - * record, and advance to the next page. This has the same calling and - * result conditions as AdvanceXLInsertBuffer, except that - * AdvanceXLInsertBuffer expects the current page to be already filled. - */ -static bool -WasteXLInsertBuffer(void) -{ - XLogCtlInsert *Insert = &XLogCtl->Insert; - XLogRecord *record; - XLogRecPtr RecPtr; - uint32 freespace; - uint16 curridx; - crc64 rdata_crc; - - freespace = INSERT_FREESPACE(Insert); - Assert(freespace >= SizeOfXLogRecord); - freespace -= SizeOfXLogRecord; - - curridx = Insert->curridx; - record = (XLogRecord *) Insert->currpos; - - record->xl_prev = Insert->PrevRecord; - record->xl_xact_prev.xlogid = 0; - record->xl_xact_prev.xrecoff = 0; - - record->xl_xid = InvalidTransactionId; - record->xl_len = freespace; - record->xl_info = XLOG_WASTED_SPACE; - record->xl_rmid = RM_XLOG_ID; - - INIT_CRC64(rdata_crc); - COMP_CRC64(rdata_crc, XLogRecGetData(record), freespace); - COMP_CRC64(rdata_crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(rdata_crc); - record->xl_crc = rdata_crc; - - /* Compute record's XLOG location */ - INSERT_RECPTR(RecPtr, Insert, curridx); - - /* Record begin of record in appropriate places */ - Insert->PrevRecord = RecPtr; - - /* We needn't bother to advance Insert->currpos */ - - return AdvanceXLInsertBuffer(); -} - /* * Write and/or fsync the log at least as far as WriteRqst indicates. * @@ -1355,7 +1217,7 @@ XLogWrite(XLogwrtRqst WriteRqst) if (openLogFile < 0) { XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg); - openLogFile = XLogFileOpen(openLogId, openLogSeg, false); + openLogFile = XLogFileOpen(openLogId, openLogSeg); openLogOff = 0; } @@ -1439,7 +1301,7 @@ XLogWrite(XLogwrtRqst WriteRqst) if (openLogFile < 0) { XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg); - openLogFile = XLogFileOpen(openLogId, openLogSeg, false); + openLogFile = XLogFileOpen(openLogId, openLogSeg); openLogOff = 0; } issue_xlog_fsync(); @@ -1617,7 +1479,7 @@ XLogFileInit(uint32 log, uint32 seg, int fd; int nbytes; - XLogFilePath(path, log, seg); + XLogFilePath(path, ThisTimeLineID, log, seg); /* * Try to use existent file (checkpoint maker may have created it @@ -1730,6 +1592,109 @@ XLogFileInit(uint32 log, uint32 seg, return (fd); } +/* + * Create a new XLOG file segment by copying a pre-existing one. + * + * log, seg: identify segment to be created. + * + * srcTLI, srclog, srcseg: identify segment to be copied (could be from + * a different timeline) + * + * Currently this is only used during recovery, and so there are no locking + * considerations. But we should be just as tense as XLogFileInit to avoid + * emplacing a bogus file. + */ +static void +XLogFileCopy(uint32 log, uint32 seg, + TimeLineID srcTLI, uint32 srclog, uint32 srcseg) +{ + char path[MAXPGPATH]; + char tmppath[MAXPGPATH]; + char buffer[BLCKSZ]; + int srcfd; + int fd; + int nbytes; + + /* + * Open the source file + */ + XLogFilePath(path, srcTLI, srclog, srcseg); + srcfd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); + if (srcfd < 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", path))); + + /* + * Copy into a temp file name. + */ + snprintf(tmppath, MAXPGPATH, "%s/xlogtemp.%d", + XLogDir, (int) getpid()); + + unlink(tmppath); + + /* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */ + fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + S_IRUSR | S_IWUSR); + if (fd < 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", tmppath))); + + /* + * Do the data copying. + */ + for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer)) + { + errno = 0; + if ((int) read(srcfd, buffer, sizeof(buffer)) != (int) sizeof(buffer)) + { + if (errno != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", path))); + else + ereport(PANIC, + (errmsg("insufficient data in file \"%s\"", path))); + } + errno = 0; + if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer)) + { + int save_errno = errno; + + /* + * If we fail to make the file, delete it to release disk + * space + */ + unlink(tmppath); + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; + + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); + } + } + + if (pg_fsync(fd) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", tmppath))); + + if (close(fd)) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", tmppath))); + + close(srcfd); + + /* + * Now move the segment into place with its final name. + */ + if (!InstallXLogFileSegment(log, seg, tmppath, false, 0, false)) + elog(PANIC, "InstallXLogFileSegment should not have failed"); +} + /* * Install a new XLOG segment file as a current or future log segment. * @@ -1763,7 +1728,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, char path[MAXPGPATH]; struct stat stat_buf; - XLogFilePath(path, log, seg); + XLogFilePath(path, ThisTimeLineID, log, seg); /* * We want to be sure that only one process does this at a time. @@ -1789,7 +1754,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, return false; } NextLogSeg(log, seg); - XLogFilePath(path, log, seg); + XLogFilePath(path, ThisTimeLineID, log, seg); } } @@ -1820,73 +1785,102 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, } /* - * Open a pre-existing logfile segment. + * Open a pre-existing logfile segment for writing. */ static int -XLogFileOpen(uint32 log, uint32 seg, bool econt) +XLogFileOpen(uint32 log, uint32 seg) { char path[MAXPGPATH]; int fd; - if (InArchiveRecovery) - RestoreArchivedXLog(path, log, seg); - else - XLogFilePath(path, log, seg); + XLogFilePath(path, ThisTimeLineID, log, seg); fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT, S_IRUSR | S_IWUSR); if (fd < 0) - { - if (econt && errno == ENOENT) - { - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not open file \"%s\" (log file %u, segment %u): %m", - path, log, seg))); - return (fd); - } ereport(PANIC, (errcode_for_file_access(), errmsg("could not open file \"%s\" (log file %u, segment %u): %m", path, log, seg))); - } + + return fd; +} + +/* + * Open a logfile segment for reading (during recovery). + */ +static int +XLogFileRead(uint32 log, uint32 seg, int emode) +{ + char path[MAXPGPATH]; + char xlogfname[MAXFNAMELEN]; + ListCell *cell; + int fd; /* - * XXX this is a pretty horrid hack. Remove after implementing timelines. - * - * if we switched back to local xlogs after having been - * restoring from archive, we need to make sure that the - * local files don't get removed by end-of-recovery checkpoint - * in case we need to re-run the recovery + * Loop looking for a suitable timeline ID: we might need to + * read any of the timelines listed in expectedTLIs. * - * we want to copy these away as soon as possible, so set - * the archive status flag to .ready for them - * in case admin isn't cautious enough to have done this anyway - * - * XXX this is completely broken, because there is no guarantee this file - * is actually complete and ready to be archived. Also, what if there's - * a .done file for them? + * We expect curFileTLI on entry to be the TLI of the preceding file + * in sequence, or 0 if there was no predecessor. We do not allow + * curFileTLI to go backwards; this prevents us from picking up the + * wrong file when a parent timeline extends to higher segment numbers + * than the child we want to read. */ - if (InArchiveRecovery && !restoredFromArchive) - XLogArchiveNotifySeg(log, seg); + foreach(cell, expectedTLIs) + { + TimeLineID tli = (TimeLineID) lfirst_int(cell); - return (fd); + if (tli < curFileTLI) + break; /* don't bother looking at too-old TLIs */ + + if (InArchiveRecovery) + { + XLogFileName(xlogfname, tli, log, seg); + restoredFromArchive = RestoreArchivedFile(path, xlogfname, + "RECOVERYXLOG"); + } + else + XLogFilePath(path, tli, log, seg); + + fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); + if (fd >= 0) + { + /* Success! */ + curFileTLI = tli; + return fd; + } + if (errno != ENOENT) /* unexpected failure? */ + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" (log file %u, segment %u): %m", + path, log, seg))); + } + + /* Couldn't find it. For simplicity, complain about front timeline */ + XLogFilePath(path, recoveryTargetTLI, log, seg); + errno = ENOENT; + ereport(emode, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" (log file %u, segment %u): %m", + path, log, seg))); + return -1; } /* - * Get next logfile segment when using off-line archive for recovery - * - * Attempt to retrieve the specified segment from off-line archival storage. + * Attempt to retrieve the specified file from off-line archival storage. * If successful, fill "path" with its complete path (note that this will be - * a temp file name that doesn't follow the normal naming convention). + * a temp file name that doesn't follow the normal naming convention), and + * return TRUE. * - * If not successful, fill "path" with the name of the normal on-line segment - * file (which may or may not actually exist, but we'll try to use it). + * If not successful, fill "path" with the name of the normal on-line file + * (which may or may not actually exist, but we'll try to use it), and return + * FALSE. */ -static void -RestoreArchivedXLog(char *path, uint32 log, uint32 seg) +static bool +RestoreArchivedFile(char *path, const char *xlogfname, + const char *recovername) { - char xlogfname[MAXFNAMELEN]; char xlogpath[MAXPGPATH]; char xlogRestoreCmd[MAXPGPATH]; char *dp; @@ -1919,11 +1913,10 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg) * The copy-from-archive filename is always the same, ensuring that we * don't run out of disk space on long recoveries. */ - XLogFileName(xlogfname, log, seg); - snprintf(xlogpath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir); + snprintf(xlogpath, MAXPGPATH, "%s/%s", XLogDir, recovername); /* - * Make sure there is no existing RECOVERYXLOG file. + * Make sure there is no existing file named recovername. */ if (stat(xlogpath, &stat_buf) != 0) { @@ -2004,8 +1997,7 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg) (errmsg("restored log file \"%s\" from archive", xlogfname))); strcpy(path, xlogpath); - restoredFromArchive = true; - return; + return true; } if (errno != ENOENT) ereport(FATAL, @@ -2033,8 +2025,8 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg) * In many recovery scenarios we expect this to fail also, but * if so that just means we've reached the end of WAL. */ - XLogFilePath(path, log, seg); - restoredFromArchive = false; + snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlogfname); + return false; } /* @@ -2085,18 +2077,25 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr) errmsg("could not open transaction log directory \"%s\": %m", XLogDir))); - XLogFileName(lastoff, log, seg); + XLogFileName(lastoff, ThisTimeLineID, log, seg); errno = 0; while ((xlde = readdir(xldir)) != NULL) { /* - * use the alphanumeric sorting property of the filenames to decide - * which ones are earlier than the lastoff segment + * We ignore the timeline part of the XLOG segment identifiers in + * deciding whether a segment is still needed. This ensures that + * we won't prematurely remove a segment from a parent timeline. + * We could probably be a little more proactive about removing + * segments of non-parent timelines, but that would be a whole lot + * more complicated. + * + * We use the alphanumeric sorting property of the filenames to decide + * which ones are earlier than the lastoff segment. */ - if (strlen(xlde->d_name) == 16 && - strspn(xlde->d_name, "0123456789ABCDEF") == 16 && - strcmp(xlde->d_name, lastoff) <= 0) + if (strlen(xlde->d_name) == 24 && + strspn(xlde->d_name, "0123456789ABCDEF") == 24 && + strcmp(xlde->d_name + 8, lastoff + 8) <= 0) { bool recycle; @@ -2185,7 +2184,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) page = (Page) BufferGetPage(buffer); memcpy((char *) page, blk, BLCKSZ); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } @@ -2272,11 +2271,13 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) { XLogRecord *record; XLogRecPtr tmpRecPtr = EndRecPtr; + bool randAccess = false; uint32 len, total_len; uint32 targetPageOff; + uint32 targetRecOff; + uint32 pageHeaderSize; unsigned i; - bool nextmode = false; if (readBuf == NULL) { @@ -2295,7 +2296,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) if (RecPtr == NULL) { RecPtr = &tmpRecPtr; - nextmode = true; /* fast case if next record is on same page */ if (nextRecord != NULL) { @@ -2310,12 +2310,24 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) (tmpRecPtr.xlogid)++; tmpRecPtr.xrecoff = 0; } - tmpRecPtr.xrecoff += SizeOfXLogPHD; + /* We will account for page header size below */ + } + else + { + if (!XRecOffIsValid(RecPtr->xrecoff)) + ereport(PANIC, + (errmsg("invalid record offset at %X/%X", + RecPtr->xlogid, RecPtr->xrecoff))); + /* + * Since we are going to a random position in WAL, forget any + * prior state about what timeline we were in, and allow it + * to be any timeline in expectedTLIs. We also set a flag to + * allow curFileTLI to go backwards (but we can't reset that + * variable right here, since we might not change files at all). + */ + lastPageTLI = 0; /* see comment in ValidXLOGHeader */ + randAccess = true; /* allow curFileTLI to go backwards too */ } - else if (!XRecOffIsValid(RecPtr->xrecoff)) - ereport(PANIC, - (errmsg("invalid record offset at %X/%X", - RecPtr->xlogid, RecPtr->xrecoff))); if (readFile >= 0 && !XLByteInSeg(*RecPtr, readId, readSeg)) { @@ -2325,7 +2337,11 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) XLByteToSeg(*RecPtr, readId, readSeg); if (readFile < 0) { - readFile = XLogFileOpen(readId, readSeg, (emode == LOG)); + /* Now it's okay to reset curFileTLI if random fetch */ + if (randAccess) + curFileTLI = 0; + + readFile = XLogFileRead(readId, readSeg, emode); if (readFile < 0) goto next_record_is_invalid; readOff = (uint32) (-1); /* force read to occur below */ @@ -2351,11 +2367,30 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) readId, readSeg, readOff))); goto next_record_is_invalid; } - if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, nextmode)) + if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode)) goto next_record_is_invalid; } + pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); + targetRecOff = RecPtr->xrecoff % BLCKSZ; + if (targetRecOff == 0) + { + /* + * Can only get here in the continuing-from-prev-page case, because + * XRecOffIsValid eliminated the zero-page-offset case otherwise. + * Need to skip over the new page's header. + */ + tmpRecPtr.xrecoff += pageHeaderSize; + targetRecOff = pageHeaderSize; + } + else if (targetRecOff < pageHeaderSize) + { + ereport(emode, + (errmsg("invalid record offset at %X/%X", + RecPtr->xlogid, RecPtr->xrecoff))); + goto next_record_is_invalid; + } if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && - RecPtr->xrecoff % BLCKSZ == SizeOfXLogPHD) + targetRecOff == pageHeaderSize) { ereport(emode, (errmsg("contrecord is requested by %X/%X", @@ -2428,7 +2463,7 @@ got_record:; close(readFile); readFile = -1; NextLogSeg(readId, readSeg); - readFile = XLogFileOpen(readId, readSeg, (emode == LOG)); + readFile = XLogFileRead(readId, readSeg, emode); if (readFile < 0) goto next_record_is_invalid; readOff = 0; @@ -2441,7 +2476,7 @@ got_record:; readId, readSeg, readOff))); goto next_record_is_invalid; } - if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, true)) + if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode)) goto next_record_is_invalid; if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD)) { @@ -2450,7 +2485,8 @@ got_record:; readId, readSeg, readOff))); goto next_record_is_invalid; } - contrecord = (XLogContRecord *) ((char *) readBuf + SizeOfXLogPHD); + pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); + contrecord = (XLogContRecord *) ((char *) readBuf + pageHeaderSize); if (contrecord->xl_rem_len == 0 || total_len != (contrecord->xl_rem_len + gotlen)) { @@ -2460,7 +2496,7 @@ got_record:; readId, readSeg, readOff))); goto next_record_is_invalid; } - len = BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord; + len = BLCKSZ - pageHeaderSize - SizeOfXLogContRecord; if (contrecord->xl_rem_len > len) { memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len); @@ -2474,7 +2510,8 @@ got_record:; } if (!RecordIsValid(record, *RecPtr, emode)) goto next_record_is_invalid; - if (BLCKSZ - SizeOfXLogRecord >= SizeOfXLogPHD + + pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); + if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize + SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len)) { nextRecord = (XLogRecord *) ((char *) contrecord + @@ -2482,7 +2519,7 @@ got_record:; } EndRecPtr.xlogid = readId; EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff + - SizeOfXLogPHD + SizeOfXLogContRecord + + pageHeaderSize + SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len); ReadRecPtr = *RecPtr; return record; @@ -2514,7 +2551,7 @@ next_record_is_invalid:; * ReadRecord. It's not intended for use from anywhere else. */ static bool -ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI) +ValidXLOGHeader(XLogPageHeader hdr, int emode) { XLogRecPtr recaddr; @@ -2532,46 +2569,416 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI) hdr->xlp_info, readId, readSeg, readOff))); return false; } - recaddr.xlogid = readId; - recaddr.xrecoff = readSeg * XLogSegSize + readOff; - if (!XLByteEQ(hdr->xlp_pageaddr, recaddr)) + if (hdr->xlp_info & XLP_LONG_HEADER) { - ereport(emode, - (errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u", - hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff, - readId, readSeg, readOff))); - return false; - } + XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr; - /* - * We disbelieve a SUI less than the previous page's SUI, or more than - * a few counts greater. In theory as many as 512 shutdown checkpoint - * records could appear on a 32K-sized xlog page, so that's the most - * differential there could legitimately be. - * - * Note this check can only be applied when we are reading the next page - * in sequence, so ReadRecord passes a flag indicating whether to - * check. - */ - if (checkSUI) - { - if (hdr->xlp_sui < lastReadSUI || - hdr->xlp_sui > lastReadSUI + 512) + if (longhdr->xlp_sysid != ControlFile->system_identifier) { - ereport(emode, - /* translator: SUI = startup id */ - (errmsg("out-of-sequence SUI %u (after %u) in log file %u, segment %u, offset %u", - hdr->xlp_sui, lastReadSUI, - readId, readSeg, readOff))); - return false; - } - } - lastReadSUI = hdr->xlp_sui; - return true; -} + char fhdrident_str[32]; + char sysident_str[32]; -/* - * I/O routines for pg_control + /* + * Format sysids separately to keep platform-dependent format + * code out of the translatable message string. + */ + snprintf(fhdrident_str, sizeof(fhdrident_str), UINT64_FORMAT, + longhdr->xlp_sysid); + snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT, + ControlFile->system_identifier); + ereport(emode, + (errmsg("WAL file is from different system"), + errdetail("WAL file SYSID is %s, pg_control SYSID is %s", + fhdrident_str, sysident_str))); + return false; + } + if (longhdr->xlp_seg_size != XLogSegSize) + { + ereport(emode, + (errmsg("WAL file is from different system"), + errdetail("Incorrect XLOG_SEG_SIZE in page header."))); + return false; + } + } + recaddr.xlogid = readId; + recaddr.xrecoff = readSeg * XLogSegSize + readOff; + if (!XLByteEQ(hdr->xlp_pageaddr, recaddr)) + { + ereport(emode, + (errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u", + hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff, + readId, readSeg, readOff))); + return false; + } + + /* + * Check page TLI is one of the expected values. + */ + if (!list_member_int(expectedTLIs, (int) hdr->xlp_tli)) + { + ereport(emode, + (errmsg("unexpected timeline ID %u in log file %u, segment %u, offset %u", + hdr->xlp_tli, + readId, readSeg, readOff))); + return false; + } + + /* + * Since child timelines are always assigned a TLI greater than their + * immediate parent's TLI, we should never see TLI go backwards across + * successive pages of a consistent WAL sequence. + * + * Of course this check should only be applied when advancing sequentially + * across pages; therefore ReadRecord resets lastPageTLI to zero when + * going to a random page. + */ + if (hdr->xlp_tli < lastPageTLI) + { + ereport(emode, + (errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u", + hdr->xlp_tli, lastPageTLI, + readId, readSeg, readOff))); + return false; + } + lastPageTLI = hdr->xlp_tli; + return true; +} + +/* + * Try to read a timeline's history file. + * + * If successful, return the list of component TLIs (the given TLI followed by + * its ancestor TLIs). If we can't find the history file, assume that the + * timeline has no parents, and return a list of just the specified timeline + * ID. + */ +static List * +readTimeLineHistory(TimeLineID targetTLI) +{ + List *result; + char path[MAXPGPATH]; + char histfname[MAXFNAMELEN]; + char fline[MAXPGPATH]; + FILE *fd; + + if (InArchiveRecovery) + { + TLHistoryFileName(histfname, targetTLI); + RestoreArchivedFile(path, histfname, "RECOVERYHISTORY"); + } + else + TLHistoryFilePath(path, targetTLI); + + fd = AllocateFile(path, "r"); + if (fd == NULL) + { + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not open \"%s\": %m", path))); + /* Not there, so assume no parents */ + return list_make1_int((int) targetTLI); + } + + result = NIL; + + /* + * Parse the file... + */ + while (fgets(fline, MAXPGPATH, fd) != NULL) + { + /* skip leading whitespace and check for # comment */ + char *ptr; + char *endptr; + TimeLineID tli; + + for (ptr = fline; *ptr; ptr++) + { + if (!isspace((unsigned char) *ptr)) + break; + } + if (*ptr == '\0' || *ptr == '#') + continue; + + /* expect a numeric timeline ID as first field of line */ + tli = (TimeLineID) strtoul(ptr, &endptr, 0); + if (endptr == ptr) + ereport(FATAL, + (errmsg("syntax error in history file: %s", fline), + errhint("Expected a numeric timeline ID."))); + + if (result && + tli <= (TimeLineID) linitial_int(result)) + ereport(FATAL, + (errmsg("invalid data in history file: %s", fline), + errhint("Timeline IDs must be in increasing sequence."))); + + /* Build list with newest item first */ + result = lcons_int((int) tli, result); + + /* we ignore the remainder of each line */ + } + + FreeFile(fd); + + if (result && + targetTLI <= (TimeLineID) linitial_int(result)) + ereport(FATAL, + (errmsg("invalid data in history file \"%s\"", path), + errhint("Timeline IDs must be less than child timeline's ID."))); + + result = lcons_int((int) targetTLI, result); + + ereport(DEBUG3, + (errmsg_internal("history of timeline %u is %s", + targetTLI, nodeToString(result)))); + + return result; +} + +/* + * Probe whether a timeline history file exists for the given timeline ID + */ +static bool +existsTimeLineHistory(TimeLineID probeTLI) +{ + char path[MAXPGPATH]; + char histfname[MAXFNAMELEN]; + FILE *fd; + + if (InArchiveRecovery) + { + TLHistoryFileName(histfname, probeTLI); + RestoreArchivedFile(path, histfname, "RECOVERYHISTORY"); + } + else + TLHistoryFilePath(path, probeTLI); + + fd = AllocateFile(path, "r"); + if (fd != NULL) + { + FreeFile(fd); + return true; + } + else + { + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not open \"%s\": %m", path))); + return false; + } +} + +/* + * Find the newest existing timeline, assuming that startTLI exists. + * + * Note: while this is somewhat heuristic, it does positively guarantee + * that (result + 1) is not a known timeline, and therefore it should + * be safe to assign that ID to a new timeline. + */ +static TimeLineID +findNewestTimeLine(TimeLineID startTLI) +{ + TimeLineID newestTLI; + TimeLineID probeTLI; + + /* + * The algorithm is just to probe for the existence of timeline history + * files. XXX is it useful to allow gaps in the sequence? + */ + newestTLI = startTLI; + + for (probeTLI = startTLI + 1; ; probeTLI++) + { + if (existsTimeLineHistory(probeTLI)) + { + newestTLI = probeTLI; /* probeTLI exists */ + } + else + { + /* doesn't exist, assume we're done */ + break; + } + } + + return newestTLI; +} + +/* + * Create a new timeline history file. + * + * newTLI: ID of the new timeline + * parentTLI: ID of its immediate parent + * endTLI et al: ID of the last used WAL file, for annotation purposes + * + * Currently this is only used during recovery, and so there are no locking + * considerations. But we should be just as tense as XLogFileInit to avoid + * emplacing a bogus file. + */ +static void +writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, + TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) +{ + char path[MAXPGPATH]; + char tmppath[MAXPGPATH]; + char histfname[MAXFNAMELEN]; + char xlogfname[MAXFNAMELEN]; + char buffer[BLCKSZ]; + int srcfd; + int fd; + int nbytes; + + Assert(newTLI > parentTLI); /* else bad selection of newTLI */ + + /* + * Write into a temp file name. + */ + snprintf(tmppath, MAXPGPATH, "%s/xlogtemp.%d", + XLogDir, (int) getpid()); + + unlink(tmppath); + + /* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */ + fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL, + S_IRUSR | S_IWUSR); + if (fd < 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", tmppath))); + + /* + * If a history file exists for the parent, copy it verbatim + */ + if (InArchiveRecovery) + { + TLHistoryFileName(histfname, parentTLI); + RestoreArchivedFile(path, histfname, "RECOVERYHISTORY"); + } + else + TLHistoryFilePath(path, parentTLI); + + srcfd = BasicOpenFile(path, O_RDONLY, 0); + if (srcfd < 0) + { + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not open \"%s\": %m", path))); + /* Not there, so assume parent has no parents */ + } + else + { + for (;;) + { + errno = 0; + nbytes = (int) read(srcfd, buffer, sizeof(buffer)); + if (nbytes < 0 || errno != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", path))); + if (nbytes == 0) + break; + errno = 0; + if ((int) write(fd, buffer, nbytes) != nbytes) + { + int save_errno = errno; + + /* + * If we fail to make the file, delete it to release disk + * space + */ + unlink(tmppath); + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; + + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); + } + } + close(srcfd); + } + + /* + * Append one line with the details of this timeline split. + * + * If we did have a parent file, insert an extra newline just in case + * the parent file failed to end with one. + */ + XLogFileName(xlogfname, endTLI, endLogId, endLogSeg); + + snprintf(buffer, sizeof(buffer), + "%s%u\t%s\t%s transaction %u at %s\n", + (srcfd < 0) ? "" : "\n", + parentTLI, + xlogfname, + recoveryStopAfter ? "after" : "before", + recoveryStopXid, + str_time(recoveryStopTime)); + + nbytes = strlen(buffer); + errno = 0; + if ((int) write(fd, buffer, nbytes) != nbytes) + { + int save_errno = errno; + + /* + * If we fail to make the file, delete it to release disk + * space + */ + unlink(tmppath); + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; + + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); + } + + if (pg_fsync(fd) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", tmppath))); + + if (close(fd)) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", tmppath))); + + + /* + * Now move the completed history file into place with its final name. + */ + TLHistoryFilePath(path, newTLI); + + /* + * Prefer link() to rename() here just to be really sure that we don't + * overwrite an existing logfile. However, there shouldn't be one, so + * rename() is an acceptable substitute except for the truly paranoid. + */ +#if HAVE_WORKING_LINK + if (link(tmppath, path) < 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not link file \"%s\" to \"%s\": %m", + tmppath, path))); + unlink(tmppath); +#else + if (rename(tmppath, path) < 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + tmppath, path))); +#endif + + /* The history file can be archived immediately. */ + TLHistoryFileName(histfname, newTLI); + XLogArchiveNotify(histfname); +} + +/* + * I/O routines for pg_control * * *ControlFile is a buffer in shared memory that holds an image of the * contents of pg_control. WriteControlFile() initializes pg_control @@ -2956,8 +3363,8 @@ BootStrapXLOG(void) CheckPoint checkPoint; char *buffer; XLogPageHeader page; + XLogLongPageHeader longpage; XLogRecord *record; - XLogFileHeaderData *fhdr; bool use_existent; uint64 sysidentifier; struct timeval tv; @@ -2979,6 +3386,9 @@ BootStrapXLOG(void) sysidentifier = ((uint64) tv.tv_sec) << 32; sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec); + /* First timeline ID is always 1 */ + ThisTimeLineID = 1; + /* Use malloc() to ensure buffer is MAXALIGNED */ buffer = (char *) malloc(BLCKSZ); page = (XLogPageHeader) buffer; @@ -2986,9 +3396,9 @@ BootStrapXLOG(void) /* Set up information for the initial checkpoint record */ checkPoint.redo.xlogid = 0; - checkPoint.redo.xrecoff = SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD; + checkPoint.redo.xrecoff = SizeOfXLogLongPHD; checkPoint.undo = checkPoint.redo; - checkPoint.ThisStartUpID = 0; + checkPoint.ThisTimeLineID = ThisTimeLineID; checkPoint.nextXid = FirstNormalTransactionId; checkPoint.nextOid = BootstrapObjectIdData; checkPoint.time = time(NULL); @@ -2999,38 +3409,18 @@ BootStrapXLOG(void) /* Set up the XLOG page header */ page->xlp_magic = XLOG_PAGE_MAGIC; - page->xlp_info = 0; - page->xlp_sui = checkPoint.ThisStartUpID; + page->xlp_info = XLP_LONG_HEADER; + page->xlp_tli = ThisTimeLineID; page->xlp_pageaddr.xlogid = 0; page->xlp_pageaddr.xrecoff = 0; - - /* Insert the file header record */ - record = (XLogRecord *) ((char *) page + SizeOfXLogPHD); - record->xl_prev.xlogid = 0; - record->xl_prev.xrecoff = 0; - record->xl_xact_prev.xlogid = 0; - record->xl_xact_prev.xrecoff = 0; - record->xl_xid = InvalidTransactionId; - record->xl_len = SizeOfXLogFHD; - record->xl_info = XLOG_FILE_HEADER; - record->xl_rmid = RM_XLOG_ID; - fhdr = (XLogFileHeaderData *) XLogRecGetData(record); - fhdr->xlfhd_sysid = sysidentifier; - fhdr->xlfhd_xlogid = 0; - fhdr->xlfhd_segno = 0; - fhdr->xlfhd_seg_size = XLogSegSize; - - INIT_CRC64(crc); - COMP_CRC64(crc, fhdr, SizeOfXLogFHD); - COMP_CRC64(crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(crc); - record->xl_crc = crc; + longpage = (XLogLongPageHeader) page; + longpage->xlp_sysid = sysidentifier; + longpage->xlp_seg_size = XLogSegSize; /* Insert the initial checkpoint record */ - record = (XLogRecord *) ((char *) page + SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD); + record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD); record->xl_prev.xlogid = 0; - record->xl_prev.xrecoff = SizeOfXLogPHD; + record->xl_prev.xrecoff = 0; record->xl_xact_prev.xlogid = 0; record->xl_xact_prev.xrecoff = 0; record->xl_xid = InvalidTransactionId; @@ -3050,7 +3440,7 @@ BootStrapXLOG(void) use_existent = false; openLogFile = XLogFileInit(0, 0, &use_existent, false); - /* Write the first page with the initial records */ + /* Write the first page with the initial record */ errno = 0; if (write(openLogFile, buffer, BLCKSZ) != BLCKSZ) { @@ -3120,6 +3510,8 @@ readRecoveryCommandFile(void) char recoveryCommandFile[MAXPGPATH]; FILE *fd; char cmdline[MAXPGPATH]; + TimeLineID rtli = 0; + bool rtliGiven = false; bool syntaxError = false; snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir); @@ -3177,11 +3569,31 @@ readRecoveryCommandFile(void) } if (strcmp(tok1,"restore_command") == 0) { - StrNCpy(recoveryRestoreCommand, tok2, MAXPGPATH); + recoveryRestoreCommand = pstrdup(tok2); ereport(LOG, (errmsg("restore_command = \"%s\"", recoveryRestoreCommand))); } + else if (strcmp(tok1,"recovery_target_timeline") == 0) { + rtliGiven = true; + if (strcmp(tok2, "latest") == 0) + rtli = 0; + else + { + errno = 0; + rtli = (TimeLineID) strtoul(tok2, NULL, 0); + if (errno == EINVAL || errno == ERANGE) + ereport(FATAL, + (errmsg("recovery_target_timeline is not a valid number: \"%s\"", + tok2))); + } + if (rtli) + ereport(LOG, + (errmsg("recovery_target_timeline = %u", rtli))); + else + ereport(LOG, + (errmsg("recovery_target_timeline = latest"))); + } else if (strcmp(tok1,"recovery_target_xid") == 0) { errno = 0; recoveryTargetXid = (TransactionId) strtoul(tok2, NULL, 0); @@ -3246,22 +3658,44 @@ readRecoveryCommandFile(void) errhint("Lines should have the format parameter = 'value'."))); /* Check that required parameters were supplied */ - if (recoveryRestoreCommand[0] == '\0') + if (recoveryRestoreCommand == NULL) ereport(FATAL, (errmsg("recovery command file \"%s\" did not specify restore_command", recoveryCommandFile))); + /* Enable fetching from archive recovery area */ + InArchiveRecovery = true; + /* - * clearly indicate our state + * If user specified recovery_target_timeline, validate it or compute the + * "latest" value. We can't do this until after we've gotten the restore + * command and set InArchiveRecovery, because we need to fetch timeline + * history files from the archive. */ - InArchiveRecovery = true; + if (rtliGiven) + { + if (rtli) + { + /* Timeline 1 does not have a history file, all else should */ + if (rtli != 1 && !existsTimeLineHistory(rtli)) + ereport(FATAL, + (errmsg("recovery_target_timeline %u does not exist", + rtli))); + recoveryTargetTLI = rtli; + } + else + { + /* We start the "latest" search from pg_control's timeline */ + recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI); + } + } } /* * Exit archive-recovery state */ static void -exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) +exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) { char recoveryPath[MAXPGPATH]; char xlogpath[MAXPGPATH]; @@ -3269,7 +3703,7 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) char recoveryCommandDone[MAXPGPATH]; /* - * Disable fetches from archive, so we can use XLogFileOpen below. + * We are no longer in archive recovery state. */ InArchiveRecovery = false; @@ -3294,10 +3728,12 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) * more descriptive of what our current database state is, because that * is what we replayed from. * - * XXX there ought to be a timeline increment somewhere around here. + * Note that if we are establishing a new timeline, ThisTimeLineID is + * already set to the new value, and so we will create a new file instead + * of overwriting any existing file. */ snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir); - XLogFilePath(xlogpath, endLogId, endLogSeg); + XLogFilePath(xlogpath, ThisTimeLineID, endLogId, endLogSeg); if (restoredFromArchive) { @@ -3319,61 +3755,26 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) * RECOVERYXLOG laying about, get rid of it. */ unlink(recoveryPath); /* ignore any error */ + /* + * If we are establishing a new timeline, we have to copy data + * from the last WAL segment of the old timeline to create a + * starting WAL segment for the new timeline. + */ + if (endTLI != ThisTimeLineID) + XLogFileCopy(endLogId, endLogSeg, + endTLI, endLogId, endLogSeg); } /* - * If we restored to a point-in-time, then the current WAL segment - * probably contains records beyond the stop point. These represent an - * extreme hazard: if we crash in the near future, the replay apparatus - * will know no reason why it shouldn't replay them. Therefore, - * explicitly zero out all the remaining pages of the segment. (We need - * not worry about the partial page in which the last record ends, since - * StartUpXlog will handle zeroing that. Also, there's nothing to do - * if we are right at a segment boundary.) - * - * XXX segment files beyond thhe current one also represent a hazard - * for the same reason. Need to invent timelines to fix this. + * Let's just make real sure there are not .ready or .done flags posted + * for the new segment. */ + XLogFileName(xlogpath, ThisTimeLineID, endLogId, endLogSeg); + XLogArchiveCleanup(xlogpath); - /* align xrecoff to next page, then drop segment part */ - if (xrecoff % BLCKSZ != 0) - xrecoff += (BLCKSZ - xrecoff % BLCKSZ); - xrecoff %= XLogSegSize; - - if (recoveryTarget && xrecoff != 0) - { - int fd; - char zbuffer[BLCKSZ]; - - fd = XLogFileOpen(endLogId, endLogSeg, false); - MemSet(zbuffer, 0, sizeof(zbuffer)); - if (lseek(fd, (off_t) xrecoff, SEEK_SET) < 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not seek in file \"%s\": %m", - xlogpath))); - for (; xrecoff < XLogSegSize; xrecoff += sizeof(zbuffer)) - { - errno = 0; - if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer)) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", xlogpath))); - } - } - if (pg_fsync(fd) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not fsync file \"%s\": %m", xlogpath))); - if (close(fd)) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not close file \"%s\": %m", xlogpath))); - } + /* Get rid of any remaining recovered timeline-history file, too */ + snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYHISTORY", XLogDir); + unlink(recoveryPath); /* ignore any error */ /* * Rename the config file out of the way, so that we don't accidentally @@ -3398,6 +3799,8 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) * * Returns TRUE if we are stopping, FALSE otherwise. On TRUE return, * *includeThis is set TRUE if we should apply this record before stopping. + * Also, some information is saved in recoveryStopXid et al for use in + * annotating the new timeline's history file. */ static bool recoveryStopsHere(XLogRecord *record, bool *includeThis) @@ -3466,27 +3869,31 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis) if (stopsHere) { + recoveryStopXid = record->xl_xid; + recoveryStopTime = recordXtime; + recoveryStopAfter = *includeThis; + if (record_info == XLOG_XACT_COMMIT) { - if (*includeThis) + if (recoveryStopAfter) ereport(LOG, (errmsg("recovery stopping after commit of transaction %u, time %s", - record->xl_xid, str_time(recordXtime)))); + recoveryStopXid, str_time(recoveryStopTime)))); else ereport(LOG, (errmsg("recovery stopping before commit of transaction %u, time %s", - record->xl_xid, str_time(recordXtime)))); + recoveryStopXid, str_time(recoveryStopTime)))); } else { - if (*includeThis) + if (recoveryStopAfter) ereport(LOG, (errmsg("recovery stopping after abort of transaction %u, time %s", - record->xl_xid, str_time(recordXtime)))); + recoveryStopXid, str_time(recoveryStopTime)))); else ereport(LOG, (errmsg("recovery stopping before abort of transaction %u, time %s", - record->xl_xid, str_time(recordXtime)))); + recoveryStopXid, str_time(recoveryStopTime)))); } } @@ -3502,6 +3909,7 @@ StartupXLOG(void) XLogCtlInsert *Insert; CheckPoint checkPoint; bool wasShutdown; + bool needNewTimeLine = false; XLogRecPtr RecPtr, LastRec, checkPointLoc, @@ -3557,12 +3965,21 @@ StartupXLOG(void) pg_usleep(60000000L); #endif + /* + * Initialize on the assumption we want to recover to the same timeline + * that's active according to pg_control. + */ + recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID; + /* * Check for recovery control file, and if so set up state for * offline recovery */ readRecoveryCommandFile(); + /* Now we can determine the list of expected TLIs */ + expectedTLIs = readTimeLineHistory(recoveryTargetTLI); + /* * Get the last valid checkpoint record. If the latest one according * to pg_control is broken, try the next-to-last one. @@ -3611,17 +4028,11 @@ StartupXLOG(void) ShmemVariableCache->oidCount = 0; /* - * If it was a shutdown checkpoint, then any following WAL entries - * were created under the next StartUpID; if it was a regular - * checkpoint then any following WAL entries were created under the - * same StartUpID. We must replay WAL entries using the same StartUpID - * they were created under, so temporarily adopt that SUI (see also - * xlog_redo()). + * We must replay WAL entries using the same TimeLineID they were created + * under, so temporarily adopt the TLI indicated by the checkpoint (see + * also xlog_redo()). */ - if (wasShutdown) - ThisStartUpID = checkPoint.ThisStartUpID + 1; - else - ThisStartUpID = checkPoint.ThisStartUpID; + ThisTimeLineID = checkPoint.ThisTimeLineID; RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo; @@ -3663,12 +4074,18 @@ StartupXLOG(void) RmgrTable[rmid].rm_startup(); } - /* Is REDO required ? */ + /* + * Find the first record that logically follows the checkpoint --- + * it might physically precede it, though. + */ if (XLByteLT(checkPoint.redo, RecPtr)) + { + /* back up to find the record */ record = ReadRecord(&(checkPoint.redo), PANIC, buffer); + } else { - /* read past CheckPoint record */ + /* just have to read next record after CheckPoint */ record = ReadRecord(NULL, LOG, buffer); } @@ -3708,6 +4125,7 @@ StartupXLOG(void) */ if (recoveryStopsHere(record, &recoveryApply)) { + needNewTimeLine = true; /* see below */ recoveryContinue = false; if (!recoveryApply) break; @@ -3752,6 +4170,26 @@ StartupXLOG(void) EndOfLog = EndRecPtr; XLByteToPrevSeg(EndOfLog, endLogId, endLogSeg); + /* + * Consider whether we need to assign a new timeline ID. + * + * If we stopped short of the end of WAL during recovery, then we + * are generating a new timeline and must assign it a unique new ID. + * Otherwise, we can just extend the timeline we were in when we + * ran out of WAL. + */ + if (needNewTimeLine) + { + ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1; + ereport(LOG, + (errmsg("selected new timeline ID: %u", ThisTimeLineID))); + writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI, + curFileTLI, endLogId, endLogSeg); + } + + /* Save the selected TimeLineID in shared memory, too */ + XLogCtl->ThisTimeLineID = ThisTimeLineID; + /* * We are now done reading the old WAL. Turn off archive fetching * if it was active, and make a writable copy of the last WAL segment. @@ -3759,7 +4197,7 @@ StartupXLOG(void) * readBuf; we will use that below.) */ if (InArchiveRecovery) - exitArchiveRecovery(endLogId, endLogSeg, EndOfLog.xrecoff); + exitArchiveRecovery(curFileTLI, endLogId, endLogSeg); /* * Prepare to write WAL starting at EndOfLog position, and init xlog @@ -3768,7 +4206,7 @@ StartupXLOG(void) */ openLogId = endLogId; openLogSeg = endLogSeg; - openLogFile = XLogFileOpen(openLogId, openLogSeg, false); + openLogFile = XLogFileOpen(openLogId, openLogSeg); openLogOff = 0; ControlFile->logId = openLogId; ControlFile->logSeg = openLogSeg + 1; @@ -3812,9 +4250,8 @@ StartupXLOG(void) * XLogWrite()). * * Note: it might seem we should do AdvanceXLInsertBuffer() here, but - * we can't since we haven't yet determined the correct StartUpID - * to put into the new page's header. The first actual attempt to - * insert a log record will advance the insert state. + * this is sufficient. The first actual attempt to insert a log + * record will advance the insert state. */ XLogCtl->Write.curridx = NextBufIdx(0); } @@ -3860,22 +4297,15 @@ StartupXLOG(void) RmgrTable[rmid].rm_cleanup(); } - /* - * At this point, ThisStartUpID is the largest SUI that we could - * find evidence for in the WAL entries. But check it against - * pg_control's latest checkpoint, to make sure that we can't - * accidentally re-use an already-used SUI. - */ - if (ThisStartUpID < ControlFile->checkPointCopy.ThisStartUpID) - ThisStartUpID = ControlFile->checkPointCopy.ThisStartUpID; - /* * Perform a new checkpoint to update our recovery activity to * disk. * - * Note that we write a shutdown checkpoint. This is correct since - * the records following it will use SUI one more than what is - * shown in the checkpoint's ThisStartUpID. + * Note that we write a shutdown checkpoint rather than an on-line + * one. This is not particularly critical, but since we may be + * assigning a new TLI, using a shutdown checkpoint allows us to + * have the rule that TLI only changes in shutdown checkpoints, + * which allows some extra error checking in xlog_redo. * * In case we had to use the secondary checkpoint, make sure that it * will still be shown as the secondary checkpoint after this @@ -3890,31 +4320,12 @@ StartupXLOG(void) */ XLogCloseRelationCache(); } - else - { - /* - * If we are not doing recovery, then we saw a checkpoint with - * nothing after it, and we can safely use StartUpID equal to one - * more than the checkpoint's SUI. But just for paranoia's sake, - * check against pg_control too. - */ - ThisStartUpID = checkPoint.ThisStartUpID; - if (ThisStartUpID < ControlFile->checkPointCopy.ThisStartUpID) - ThisStartUpID = ControlFile->checkPointCopy.ThisStartUpID; - } /* * Preallocate additional log files, if wanted. */ PreallocXlogFiles(EndOfLog); - /* - * Advance StartUpID to one more than the highest value used - * previously. - */ - ThisStartUpID++; - XLogCtl->ThisStartUpID = ThisStartUpID; - /* * Okay, we're officially UP. */ @@ -4018,18 +4429,18 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, /* * This must be called during startup of a backend process, except that * it need not be called in a standalone backend (which does StartupXLOG - * instead). We need to initialize the local copies of ThisStartUpID and + * instead). We need to initialize the local copies of ThisTimeLineID and * RedoRecPtr. * * Note: before Postgres 7.5, we went to some effort to keep the postmaster - * process's copies of ThisStartUpID and RedoRecPtr valid too. This was + * process's copies of ThisTimeLineID and RedoRecPtr valid too. This was * unnecessary however, since the postmaster itself never touches XLOG anyway. */ void InitXLOGAccess(void) { - /* ThisStartUpID doesn't change so we need no lock to copy it */ - ThisStartUpID = XLogCtl->ThisStartUpID; + /* ThisTimeLineID doesn't change so we need no lock to copy it */ + ThisTimeLineID = XLogCtl->ThisTimeLineID; /* Use GetRedoRecPtr to copy the RedoRecPtr safely */ (void) GetRedoRecPtr(); } @@ -4110,7 +4521,7 @@ CreateCheckPoint(bool shutdown, bool force) } MemSet(&checkPoint, 0, sizeof(checkPoint)); - checkPoint.ThisStartUpID = ThisStartUpID; + checkPoint.ThisTimeLineID = ThisTimeLineID; checkPoint.time = time(NULL); LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); @@ -4372,8 +4783,20 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; - /* Any later WAL records should be run with shutdown SUI plus 1 */ - ThisStartUpID = checkPoint.ThisStartUpID + 1; + /* + * TLI may change in a shutdown checkpoint, but it shouldn't decrease + */ + if (checkPoint.ThisTimeLineID != ThisTimeLineID) + { + if (checkPoint.ThisTimeLineID < ThisTimeLineID || + !list_member_int(expectedTLIs, + (int) checkPoint.ThisTimeLineID)) + ereport(PANIC, + (errmsg("unexpected timeline ID %u (after %u) in checkpoint record", + checkPoint.ThisTimeLineID, ThisTimeLineID))); + /* Following WAL records should be run with new TLI */ + ThisTimeLineID = checkPoint.ThisTimeLineID; + } } else if (info == XLOG_CHECKPOINT_ONLINE) { @@ -4389,40 +4812,11 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; } - /* Any later WAL records should be run with the then-active SUI */ - ThisStartUpID = checkPoint.ThisStartUpID; - } - else if (info == XLOG_FILE_HEADER) - { - XLogFileHeaderData fhdr; - - memcpy(&fhdr, XLogRecGetData(record), sizeof(XLogFileHeaderData)); - if (fhdr.xlfhd_sysid != ControlFile->system_identifier) - { - char fhdrident_str[32]; - char sysident_str[32]; - - /* - * Format sysids separately to keep platform-dependent format - * code out of the translatable message string. - */ - snprintf(fhdrident_str, sizeof(fhdrident_str), UINT64_FORMAT, - fhdr.xlfhd_sysid); - snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT, - ControlFile->system_identifier); - ereport(PANIC, - (errmsg("WAL file is from different system"), - errdetail("WAL file SYSID is %s, pg_control SYSID is %s", - fhdrident_str, sysident_str))); - } - if (fhdr.xlfhd_seg_size != XLogSegSize) + /* TLI should not change in an on-line checkpoint */ + if (checkPoint.ThisTimeLineID != ThisTimeLineID) ereport(PANIC, - (errmsg("WAL file is from different system"), - errdetail("Incorrect XLOG_SEG_SIZE in file header."))); - } - else if (info == XLOG_WASTED_SPACE) - { - /* ignore */ + (errmsg("unexpected timeline ID %u (should be %u) in checkpoint record", + checkPoint.ThisTimeLineID, ThisTimeLineID))); } } @@ -4442,10 +4836,10 @@ xlog_desc(char *buf, uint8 xl_info, char *rec) CheckPoint *checkpoint = (CheckPoint *) rec; sprintf(buf + strlen(buf), "checkpoint: redo %X/%X; undo %X/%X; " - "sui %u; xid %u; oid %u; %s", + "tli %u; xid %u; oid %u; %s", checkpoint->redo.xlogid, checkpoint->redo.xrecoff, checkpoint->undo.xlogid, checkpoint->undo.xrecoff, - checkpoint->ThisStartUpID, checkpoint->nextXid, + checkpoint->ThisTimeLineID, checkpoint->nextXid, checkpoint->nextOid, (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online"); } @@ -4456,22 +4850,6 @@ xlog_desc(char *buf, uint8 xl_info, char *rec) memcpy(&nextOid, rec, sizeof(Oid)); sprintf(buf + strlen(buf), "nextOid: %u", nextOid); } - else if (info == XLOG_FILE_HEADER) - { - XLogFileHeaderData *fhdr = (XLogFileHeaderData *) rec; - - sprintf(buf + strlen(buf), - "file header: sysid " UINT64_FORMAT "; " - "xlogid %X segno %X; seg_size %X", - fhdr->xlfhd_sysid, - fhdr->xlfhd_xlogid, - fhdr->xlfhd_segno, - fhdr->xlfhd_seg_size); - } - else if (info == XLOG_WASTED_SPACE) - { - strcat(buf, "wasted space"); - } else strcat(buf, "UNKNOWN"); } diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index a58902e76f..8387844a77 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -2,169 +2,31 @@ * * xlogutils.c * + * PostgreSQL transaction log manager utility routines + * + * This file contains support routines that are used by XLOG replay functions. + * None of this code is used during normal system operation. + * * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.31 2004/06/18 06:13:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.32 2004/07/21 22:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "access/htup.h" #include "access/xlogutils.h" -#include "catalog/pg_database.h" -#include "storage/bufpage.h" +#include "storage/bufmgr.h" #include "storage/smgr.h" #include "utils/hsearch.h" -#include "utils/relcache.h" - - -/* - * --------------------------------------------------------------- - * - * Index support functions - * - *---------------------------------------------------------------- - */ - -/* - * Check if specified heap tuple was inserted by given - * xaction/command and return - * - * - -1 if not - * - 0 if there is no tuple at all - * - 1 if yes - */ -int -XLogIsOwnerOfTuple(RelFileNode hnode, ItemPointer iptr, - TransactionId xid, CommandId cid) -{ - Relation reln; - Buffer buffer; - Page page; - ItemId lp; - HeapTupleHeader htup; - - reln = XLogOpenRelation(false, RM_HEAP_ID, hnode); - if (!RelationIsValid(reln)) - return (0); - - buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr)); - if (!BufferIsValid(buffer)) - return (0); - - LockBuffer(buffer, BUFFER_LOCK_SHARE); - page = (Page) BufferGetPage(buffer); - if (PageIsNew((PageHeader) page) || - ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page)) - { - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return (0); - } - lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr)); - if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp)) - { - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return (0); - } - htup = (HeapTupleHeader) PageGetItem(page, lp); - - Assert(PageGetSUI(page) == ThisStartUpID); - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), xid) || - HeapTupleHeaderGetCmin(htup) != cid) - { - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return (-1); - } - - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return (1); -} /* - * MUST BE CALLED ONLY ON RECOVERY. - * - * Check if exists valid (inserted by not aborted xaction) heap tuple - * for given item pointer - */ -bool -XLogIsValidTuple(RelFileNode hnode, ItemPointer iptr) -{ - Relation reln; - Buffer buffer; - Page page; - ItemId lp; - HeapTupleHeader htup; - - reln = XLogOpenRelation(false, RM_HEAP_ID, hnode); - if (!RelationIsValid(reln)) - return (false); - - buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr)); - if (!BufferIsValid(buffer)) - return (false); - - LockBuffer(buffer, BUFFER_LOCK_SHARE); - page = (Page) BufferGetPage(buffer); - if (PageIsNew((PageHeader) page) || - ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page)) - { - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return (false); - } - - if (PageGetSUI(page) != ThisStartUpID) - { - Assert(PageGetSUI(page) < ThisStartUpID); - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return (true); - } - - lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr)); - if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp)) - { - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return (false); - } - - htup = (HeapTupleHeader) PageGetItem(page, lp); - - /* MUST CHECK WASN'T TUPLE INSERTED IN PREV STARTUP */ - - if (!(htup->t_infomask & HEAP_XMIN_COMMITTED)) - { - if (htup->t_infomask & HEAP_XMIN_INVALID || - (htup->t_infomask & HEAP_MOVED_IN && - TransactionIdDidAbort(HeapTupleHeaderGetXvac(htup))) || - TransactionIdDidAbort(HeapTupleHeaderGetXmin(htup))) - { - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return (false); - } - } - - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return (true); -} - -/* - * --------------------------------------------------------------- * * Storage related support functions * - *---------------------------------------------------------------- */ Buffer @@ -198,8 +60,10 @@ XLogReadBuffer(bool extend, Relation reln, BlockNumber blkno) return (buffer); } + /* - * "Relation" cache + * Lightweight "Relation" cache --- this substitutes for the normal relcache + * during XLOG replay. */ typedef struct XLogRelDesc diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index dc66314afe..778c1c9521 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.113 2004/07/12 05:37:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.114 2004/07/21 22:31:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -279,7 +279,7 @@ DefineSequence(CreateSeqStmt *seq) recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); @@ -354,7 +354,7 @@ AlterSequence(AlterSeqStmt *stmt) recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); @@ -553,7 +553,7 @@ nextval(PG_FUNCTION_ARGS) recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } /* update on-disk data */ @@ -689,7 +689,7 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled) recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } /* save info in sequence relation */ @@ -1091,7 +1091,7 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record) elog(PANIC, "seq_redo: failed to add item to page"); PageSetLSN(page, lsn); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); } diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 241f2550f6..e78db91e77 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.121 2004/07/19 02:47:06 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.122 2004/07/21 22:31:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -5448,7 +5448,7 @@ copy_relation_data(Relation rel, SMgrRelation dst) recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); END_CRIT_SECTION(); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index a420865b38..dfc03ea461 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.283 2004/07/20 22:56:29 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.284 2004/07/21 22:31:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2341,7 +2341,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, recptr = log_heap_clean(onerel, buf, unused, uncnt); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } else { @@ -2491,10 +2491,10 @@ move_chain_tuple(Relation rel, if (old_buf != dst_buf) { PageSetLSN(old_page, recptr); - PageSetSUI(old_page, ThisStartUpID); + PageSetTLI(old_page, ThisTimeLineID); } PageSetLSN(dst_page, recptr); - PageSetSUI(dst_page, ThisStartUpID); + PageSetTLI(dst_page, ThisTimeLineID); } else { @@ -2611,9 +2611,9 @@ move_plain_tuple(Relation rel, dst_buf, &newtup); PageSetLSN(old_page, recptr); - PageSetSUI(old_page, ThisStartUpID); + PageSetTLI(old_page, ThisTimeLineID); PageSetLSN(dst_page, recptr); - PageSetSUI(dst_page, ThisStartUpID); + PageSetTLI(dst_page, ThisTimeLineID); } else { @@ -2807,7 +2807,7 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage) recptr = log_heap_clean(onerel, buffer, unused, uncnt); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } else { diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 2e4b263b1a..e09db0426b 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.42 2004/06/05 19:48:07 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.43 2004/07/21 22:31:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -532,7 +532,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, recptr = log_heap_clean(onerel, buffer, unused, uncnt); PageSetLSN(page, recptr); - PageSetSUI(page, ThisStartUpID); + PageSetTLI(page, ThisTimeLineID); } else { diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index c9595094f1..d42c43195d 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -19,7 +19,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.1 2004/07/19 02:47:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.2 2004/07/21 22:31:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,9 +31,10 @@ #include #include -#include "postmaster/pgarch.h" +#include "access/xlog_internal.h" #include "libpq/pqsignal.h" #include "miscadmin.h" +#include "postmaster/pgarch.h" #include "postmaster/postmaster.h" #include "storage/fd.h" #include "storage/ipc.h" @@ -63,8 +64,8 @@ * ---------- */ #define MIN_XFN_CHARS 16 -#define MAX_XFN_CHARS 16 -#define VALID_XFN_CHARS "0123456789ABCDEF" +#define MAX_XFN_CHARS 24 +#define VALID_XFN_CHARS "0123456789ABCDEF.history" #define NUM_ARCHIVE_RETRIES 3 @@ -73,8 +74,6 @@ * Local data * ---------- */ -static char XLogDir[MAXPGPATH]; -static char XLogArchiveStatusDir[MAXPGPATH]; static time_t last_pgarch_start_time; /* @@ -265,9 +264,8 @@ PgArchiverMain(int argc, char *argv[]) init_ps_display("archiver process", "", ""); set_ps_display(""); - /* Init XLOG file paths */ - snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir); - snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir); + /* Init XLOG file paths --- needed in EXEC_BACKEND case */ + XLOGPathInit(); pgarch_MainLoop(); @@ -497,6 +495,12 @@ pgarch_archiveXlog(char *xlog) * 1) to maintain the sequential chain of xlogs required for recovery * 2) because the oldest ones will sooner become candidates for * recycling at time of checkpoint + * + * NOTE: the "oldest" comparison will presently consider all segments of + * a timeline with a smaller ID to be older than all segments of a timeline + * with a larger ID; the net result being that past timelines are given + * higher priority for archiving. This seems okay, or at least not + * obviously worth changing. */ static bool pgarch_readyXlog(char *xlog) @@ -507,11 +511,13 @@ pgarch_readyXlog(char *xlog) * It is possible to optimise this code, though only a single * file is expected on the vast majority of calls, so.... */ + char XLogArchiveStatusDir[MAXPGPATH]; char newxlog[MAX_XFN_CHARS + 6 + 1]; DIR *rldir; struct dirent *rlde; bool found = false; + snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir); rldir = AllocateDir(XLogArchiveStatusDir); if (rldir == NULL) ereport(ERROR, @@ -575,14 +581,12 @@ pgarch_archiveDone(char *xlog) { char rlogready[MAXPGPATH]; char rlogdone[MAXPGPATH]; - int rc; - snprintf(rlogready, MAXPGPATH, "%s/%s.ready", XLogArchiveStatusDir, xlog); - snprintf(rlogdone, MAXPGPATH, "%s/%s.done", XLogArchiveStatusDir, xlog); - rc = rename(rlogready, rlogdone); - if (rc < 0) + StatusFilePath(rlogready, xlog, ".ready"); + StatusFilePath(rlogdone, xlog, ".done"); + if (rename(rlogready, rlogdone) < 0) ereport(WARNING, (errcode_for_file_access(), - errmsg("could not rename \"%s\": %m", - rlogready))); + errmsg("could not rename \"%s\" to \"%s\": %m", + rlogready, rlogdone))); } diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c index 4170221b22..edfdbcbee7 100644 --- a/src/backend/storage/lmgr/deadlock.c +++ b/src/backend/storage/lmgr/deadlock.c @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.28 2004/06/05 19:48:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.29 2004/07/21 22:31:22 tgl Exp $ * * Interface: * @@ -25,6 +25,7 @@ */ #include "postgres.h" +#include "catalog/pg_class.h" #include "lib/stringinfo.h" #include "miscadmin.h" #include "storage/proc.h" diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 91abe8cfa7..53054979a9 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -6,7 +6,7 @@ * copyright (c) Oliver Elphick , 2001; * licence: BSD * - * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.17 2004/06/03 00:07:36 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.18 2004/07/21 22:31:23 tgl Exp $ */ #include "postgres.h" @@ -165,7 +165,7 @@ main(int argc, char *argv[]) ControlFile.checkPointCopy.redo.xlogid, ControlFile.checkPointCopy.redo.xrecoff); printf(_("Latest checkpoint's UNDO location: %X/%X\n"), ControlFile.checkPointCopy.undo.xlogid, ControlFile.checkPointCopy.undo.xrecoff); - printf(_("Latest checkpoint's StartUpID: %u\n"), ControlFile.checkPointCopy.ThisStartUpID); + printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID); printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c index 41b81ea894..b8caf3cb7b 100644 --- a/src/bin/pg_resetxlog/pg_resetxlog.c +++ b/src/bin/pg_resetxlog/pg_resetxlog.c @@ -23,22 +23,22 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.20 2004/06/03 00:07:37 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.21 2004/07/21 22:31:24 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include -#include -#include -#include -#include -#include #include +#include #include +#include +#include +#include +#include #include "access/xlog.h" +#include "access/xlog_internal.h" #include "catalog/catversion.h" #include "catalog/pg_control.h" @@ -48,27 +48,7 @@ extern char *optarg; #define _(x) gettext((x)) -/******************** stuff copied from xlog.c ********************/ - -/* Increment an xlogid/segment pair */ -#define NextLogSeg(logId, logSeg) \ - do { \ - if ((logSeg) >= XLogSegsPerFile-1) \ - { \ - (logId)++; \ - (logSeg) = 0; \ - } \ - else \ - (logSeg)++; \ - } while (0) - -#define XLogFileName(path, log, seg) \ - snprintf(path, MAXPGPATH, "%s/%08X%08X", \ - XLogDir, log, seg) - -/******************** end of stuff copied from xlog.c ********************/ - -static char XLogDir[MAXPGPATH]; +char XLogDir[MAXPGPATH]; /* not static, see xlog_internal.h */ static char ControlFilePath[MAXPGPATH]; static ControlFileData ControlFile; /* pg_control values */ @@ -388,9 +368,9 @@ GuessControlValues(void) ControlFile.system_identifier = sysidentifier; ControlFile.checkPointCopy.redo.xlogid = 0; - ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD; + ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD; ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo; - ControlFile.checkPointCopy.ThisStartUpID = 0; + ControlFile.checkPointCopy.ThisTimeLineID = 1; ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */ ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData; ControlFile.checkPointCopy.time = time(NULL); @@ -430,7 +410,7 @@ GuessControlValues(void) /* * XXX eventually, should try to grovel through old XLOG to develop - * more accurate values for startupid, nextXID, and nextOID. + * more accurate values for TimeLineID, nextXID, and nextOID. */ } @@ -463,7 +443,7 @@ PrintControlValues(bool guessed) printf(_("Database system identifier: %s\n"), sysident_str); printf(_("Current log file ID: %u\n"), ControlFile.logId); printf(_("Next log file segment: %u\n"), ControlFile.logSeg); - printf(_("Latest checkpoint's StartUpID: %u\n"), ControlFile.checkPointCopy.ThisStartUpID); + printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID); printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); printf(_("Database block size: %u\n"), ControlFile.blcksz); @@ -506,7 +486,7 @@ RewriteControlFile(void) ControlFile.checkPointCopy.redo.xlogid = newXlogId; ControlFile.checkPointCopy.redo.xrecoff = - newXlogSeg * XLogSegSize + SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD; + newXlogSeg * XLogSegSize + SizeOfXLogLongPHD; ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo; ControlFile.checkPointCopy.time = time(NULL); @@ -634,8 +614,8 @@ WriteEmptyXLOG(void) { char *buffer; XLogPageHeader page; + XLogLongPageHeader longpage; XLogRecord *record; - XLogFileHeaderData *fhdr; crc64 crc; char path[MAXPGPATH]; int fd; @@ -648,44 +628,23 @@ WriteEmptyXLOG(void) /* Set up the XLOG page header */ page->xlp_magic = XLOG_PAGE_MAGIC; - page->xlp_info = 0; - page->xlp_sui = ControlFile.checkPointCopy.ThisStartUpID; + page->xlp_info = XLP_LONG_HEADER; + page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID; page->xlp_pageaddr.xlogid = ControlFile.checkPointCopy.redo.xlogid; page->xlp_pageaddr.xrecoff = - ControlFile.checkPointCopy.redo.xrecoff - - (SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD); + ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD; + longpage = (XLogLongPageHeader) page; + longpage->xlp_sysid = ControlFile.system_identifier; + longpage->xlp_seg_size = XLogSegSize; - /* Insert the file header record */ - record = (XLogRecord *) ((char *) page + SizeOfXLogPHD); + /* Insert the initial checkpoint record */ + record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD); record->xl_prev.xlogid = 0; record->xl_prev.xrecoff = 0; record->xl_xact_prev.xlogid = 0; record->xl_xact_prev.xrecoff = 0; record->xl_xid = InvalidTransactionId; - record->xl_len = SizeOfXLogFHD; - record->xl_info = XLOG_FILE_HEADER; - record->xl_rmid = RM_XLOG_ID; - fhdr = (XLogFileHeaderData *) XLogRecGetData(record); - fhdr->xlfhd_sysid = ControlFile.system_identifier; - fhdr->xlfhd_xlogid = page->xlp_pageaddr.xlogid; - fhdr->xlfhd_segno = page->xlp_pageaddr.xrecoff / XLogSegSize; - fhdr->xlfhd_seg_size = XLogSegSize; - - INIT_CRC64(crc); - COMP_CRC64(crc, fhdr, SizeOfXLogFHD); - COMP_CRC64(crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(crc); - record->xl_crc = crc; - - /* Insert the initial checkpoint record */ - record = (XLogRecord *) ((char *) page + SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD); - record->xl_prev.xlogid = page->xlp_pageaddr.xlogid; - record->xl_prev.xrecoff = page->xlp_pageaddr.xrecoff + SizeOfXLogPHD; - record->xl_xact_prev.xlogid = 0; - record->xl_xact_prev.xrecoff = 0; - record->xl_xid = InvalidTransactionId; record->xl_len = sizeof(CheckPoint); record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_rmid = RM_XLOG_ID; @@ -700,7 +659,8 @@ WriteEmptyXLOG(void) record->xl_crc = crc; /* Write the first page */ - XLogFileName(path, newXlogId, newXlogSeg); + XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, + newXlogId, newXlogSeg); unlink(path); diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 714518d308..458b3012ad 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.65 2004/07/17 03:30:38 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.66 2004/07/21 22:31:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -15,6 +15,7 @@ #define XACT_H #include "access/xlog.h" +#include "storage/relfilenode.h" #include "utils/nabstime.h" diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 630a62d77a..f2542d6fc7 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.53 2004/07/19 02:47:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.54 2004/07/21 22:31:25 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -14,7 +14,7 @@ #include "access/rmgr.h" #include "access/transam.h" #include "access/xlogdefs.h" -#include "storage/bufmgr.h" +#include "storage/buf.h" #include "utils/pg_crc.h" @@ -76,107 +76,6 @@ typedef struct XLogRecord #define XLOG_NO_TRAN XLR_INFO_MASK /* - * Header info for a backup block appended to an XLOG record. - * - * Note that the backup block has its own CRC, and is not covered by - * the CRC of the XLOG record proper. Also note that we don't attempt - * to align either the BkpBlock struct or the block's data. - */ -typedef struct BkpBlock -{ - crc64 crc; - RelFileNode node; - BlockNumber block; -} BkpBlock; - -/* - * When there is not enough space on current page for whole record, we - * continue on the next page with continuation record. (However, the - * XLogRecord header will never be split across pages; if there's less than - * SizeOfXLogRecord space left at the end of a page, we just waste it.) - * - * Note that xl_rem_len includes backup-block data, unlike xl_len in the - * initial header. - */ -typedef struct XLogContRecord -{ - uint32 xl_rem_len; /* total len of remaining data for record */ - - /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */ - -} XLogContRecord; - -#define SizeOfXLogContRecord MAXALIGN(sizeof(XLogContRecord)) - -/* - * Each page of XLOG file has a header like this: - */ -#define XLOG_PAGE_MAGIC 0xD05B /* can be used as WAL version indicator */ - -typedef struct XLogPageHeaderData -{ - uint16 xlp_magic; /* magic value for correctness checks */ - uint16 xlp_info; /* flag bits, see below */ - StartUpID xlp_sui; /* StartUpID of first record on page */ - XLogRecPtr xlp_pageaddr; /* XLOG address of this page */ -} XLogPageHeaderData; - -#define SizeOfXLogPHD MAXALIGN(sizeof(XLogPageHeaderData)) - -typedef XLogPageHeaderData *XLogPageHeader; - -/* When record crosses page boundary, set this flag in new page's header */ -#define XLP_FIRST_IS_CONTRECORD 0x0001 -/* All defined flag bits in xlp_info (used for validity checking of header) */ -#define XLP_ALL_FLAGS 0x0001 - -/* - * We break each logical log file (xlogid value) into segment files of the - * size indicated by XLOG_SEG_SIZE. One possible segment at the end of each - * log file is wasted, to ensure that we don't have problems representing - * last-byte-position-plus-1. - */ -#define XLogSegSize ((uint32) XLOG_SEG_SIZE) -#define XLogSegsPerFile (((uint32) 0xffffffff) / XLogSegSize) -#define XLogFileSize (XLogSegsPerFile * XLogSegSize) - -/* - * The first XLOG record in each segment file is always an XLOG_FILE_HEADER - * record. This record does nothing as far as XLOG replay is concerned, - * but it is useful for verifying that we haven't mixed up XLOG segment files. - * The body of an XLOG_FILE_HEADER record is a struct XLogFileHeaderData. - * Note: the xlogid/segno fields are really redundant with xlp_pageaddr in - * the page header, but we store them anyway as an extra check. - */ -typedef struct XLogFileHeaderData -{ - uint64 xlfhd_sysid; /* system identifier from pg_control */ - uint32 xlfhd_xlogid; /* logical log file # */ - uint32 xlfhd_segno; /* segment number within logical log file */ - uint32 xlfhd_seg_size; /* just as a cross-check */ -} XLogFileHeaderData; - -#define SizeOfXLogFHD MAXALIGN(sizeof(XLogFileHeaderData)) - - -/* - * Method table for resource managers. - * - * RmgrTable[] is indexed by RmgrId values (see rmgr.h). - */ -typedef struct RmgrData -{ - const char *rm_name; - void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr); - void (*rm_undo) (XLogRecPtr lsn, XLogRecord *rptr); - void (*rm_desc) (char *buf, uint8 xl_info, char *rec); - void (*rm_startup) (void); - void (*rm_cleanup) (void); -} RmgrData; - -extern RmgrData RmgrTable[]; - -/*-------------------- * List of these structs is used to pass data to XLogInsert(). * * If buffer is valid then XLOG will check if buffer must be backed up @@ -188,7 +87,6 @@ extern RmgrData RmgrTable[]; * the XLOG record, since we assume it's present in the buffer. Therefore, * rmgr redo routines MUST pay attention to XLR_BKP_BLOCK_X to know what * is actually stored in the XLOG record. - *-------------------- */ typedef struct XLogRecData { @@ -198,7 +96,7 @@ typedef struct XLogRecData struct XLogRecData *next; } XLogRecData; -extern StartUpID ThisStartUpID; /* current SUI */ +extern TimeLineID ThisTimeLineID; /* current TLI */ extern bool InRecovery; extern XLogRecPtr MyLastRecPtr; extern bool MyXactMadeXLogEntry; diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h new file mode 100644 index 0000000000..09877bf64b --- /dev/null +++ b/src/include/access/xlog_internal.h @@ -0,0 +1,224 @@ +/* + * xlog_internal.h + * + * PostgreSQL transaction log internal declarations + * + * NOTE: this file is intended to contain declarations useful for + * manipulating the XLOG files directly, but it is not supposed to be + * needed by rmgr routines (redo/undo support for individual record types). + * So the XLogRecord typedef and associated stuff appear in xlog.h. + * + * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.1 2004/07/21 22:31:25 tgl Exp $ + */ +#ifndef XLOG_INTERNAL_H +#define XLOG_INTERNAL_H + +#include "access/xlog.h" +#include "storage/block.h" +#include "storage/relfilenode.h" + + +/* + * Header info for a backup block appended to an XLOG record. + * + * Note that the backup block has its own CRC, and is not covered by + * the CRC of the XLOG record proper. Also note that we don't attempt + * to align either the BkpBlock struct or the block's data. + */ +typedef struct BkpBlock +{ + crc64 crc; + RelFileNode node; + BlockNumber block; +} BkpBlock; + +/* + * When there is not enough space on current page for whole record, we + * continue on the next page with continuation record. (However, the + * XLogRecord header will never be split across pages; if there's less than + * SizeOfXLogRecord space left at the end of a page, we just waste it.) + * + * Note that xl_rem_len includes backup-block data, unlike xl_len in the + * initial header. + */ +typedef struct XLogContRecord +{ + uint32 xl_rem_len; /* total len of remaining data for record */ + + /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */ + +} XLogContRecord; + +#define SizeOfXLogContRecord MAXALIGN(sizeof(XLogContRecord)) + +/* + * Each page of XLOG file has a header like this: + */ +#define XLOG_PAGE_MAGIC 0xD05B /* can be used as WAL version indicator */ + +typedef struct XLogPageHeaderData +{ + uint16 xlp_magic; /* magic value for correctness checks */ + uint16 xlp_info; /* flag bits, see below */ + TimeLineID xlp_tli; /* TimeLineID of first record on page */ + XLogRecPtr xlp_pageaddr; /* XLOG address of this page */ +} XLogPageHeaderData; + +#define SizeOfXLogShortPHD MAXALIGN(sizeof(XLogPageHeaderData)) + +typedef XLogPageHeaderData *XLogPageHeader; + +/* + * When the XLP_LONG_HEADER flag is set, we store additional fields in the + * page header. (This is ordinarily done just in the first page of an + * XLOG file.) The additional fields serve to identify the file accurately. + */ +typedef struct XLogLongPageHeaderData +{ + XLogPageHeaderData std; /* standard header fields */ + uint64 xlp_sysid; /* system identifier from pg_control */ + uint32 xlp_seg_size; /* just as a cross-check */ +} XLogLongPageHeaderData; + +#define SizeOfXLogLongPHD MAXALIGN(sizeof(XLogLongPageHeaderData)) + +typedef XLogLongPageHeaderData *XLogLongPageHeader; + +/* When record crosses page boundary, set this flag in new page's header */ +#define XLP_FIRST_IS_CONTRECORD 0x0001 +/* This flag indicates a "long" page header */ +#define XLP_LONG_HEADER 0x0002 +/* All defined flag bits in xlp_info (used for validity checking of header) */ +#define XLP_ALL_FLAGS 0x0003 + +#define XLogPageHeaderSize(hdr) \ + (((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD) + +/* + * We break each logical log file (xlogid value) into segment files of the + * size indicated by XLOG_SEG_SIZE. One possible segment at the end of each + * log file is wasted, to ensure that we don't have problems representing + * last-byte-position-plus-1. + */ +#define XLogSegSize ((uint32) XLOG_SEG_SIZE) +#define XLogSegsPerFile (((uint32) 0xffffffff) / XLogSegSize) +#define XLogFileSize (XLogSegsPerFile * XLogSegSize) + + +/* + * Macros for manipulating XLOG pointers + */ + +/* Increment an xlogid/segment pair */ +#define NextLogSeg(logId, logSeg) \ + do { \ + if ((logSeg) >= XLogSegsPerFile-1) \ + { \ + (logId)++; \ + (logSeg) = 0; \ + } \ + else \ + (logSeg)++; \ + } while (0) + +/* Decrement an xlogid/segment pair (assume it's not 0,0) */ +#define PrevLogSeg(logId, logSeg) \ + do { \ + if (logSeg) \ + (logSeg)--; \ + else \ + { \ + (logId)--; \ + (logSeg) = XLogSegsPerFile-1; \ + } \ + } while (0) + +/* + * Compute ID and segment from an XLogRecPtr. + * + * For XLByteToSeg, do the computation at face value. For XLByteToPrevSeg, + * a boundary byte is taken to be in the previous segment. This is suitable + * for deciding which segment to write given a pointer to a record end, + * for example. (We can assume xrecoff is not zero, since no valid recptr + * can have that.) + */ +#define XLByteToSeg(xlrp, logId, logSeg) \ + ( logId = (xlrp).xlogid, \ + logSeg = (xlrp).xrecoff / XLogSegSize \ + ) +#define XLByteToPrevSeg(xlrp, logId, logSeg) \ + ( logId = (xlrp).xlogid, \ + logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \ + ) + +/* + * Is an XLogRecPtr within a particular XLOG segment? + * + * For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg, + * a boundary byte is taken to be in the previous segment. + */ +#define XLByteInSeg(xlrp, logId, logSeg) \ + ((xlrp).xlogid == (logId) && \ + (xlrp).xrecoff / XLogSegSize == (logSeg)) + +#define XLByteInPrevSeg(xlrp, logId, logSeg) \ + ((xlrp).xlogid == (logId) && \ + ((xlrp).xrecoff - 1) / XLogSegSize == (logSeg)) + +/* Check if an xrecoff value is in a plausible range */ +#define XRecOffIsValid(xrecoff) \ + ((xrecoff) % BLCKSZ >= SizeOfXLogShortPHD && \ + (BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord) + +/* + * These macros encapsulate knowledge about the exact layout of XLog file + * names, timeline history file names, and archive-status file names. + */ +#define MAXFNAMELEN 32 + +#define XLogFileName(fname, tli, log, seg) \ + snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, log, seg) + +#define XLogFilePath(path, tli, log, seg) \ + snprintf(path, MAXPGPATH, "%s/%08X%08X%08X", XLogDir, tli, log, seg) + +#define TLHistoryFileName(fname, tli) \ + snprintf(fname, MAXFNAMELEN, "%08X.history", tli) + +#define TLHistoryFilePath(path, tli) \ + snprintf(path, MAXPGPATH, "%s/%08X.history", XLogDir, tli) + +#define StatusFilePath(path, xlog, suffix) \ + snprintf(path, MAXPGPATH, "%s/archive_status/%s%s", XLogDir, xlog, suffix) + +extern char XLogDir[MAXPGPATH]; + +/* + * _INTL_MAXLOGRECSZ: max space needed for a record including header and + * any backup-block data. + */ +#define _INTL_MAXLOGRECSZ (SizeOfXLogRecord + MAXLOGRECSZ + \ + XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ)) + + +/* + * Method table for resource managers. + * + * RmgrTable[] is indexed by RmgrId values (see rmgr.h). + */ +typedef struct RmgrData +{ + const char *rm_name; + void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr); + void (*rm_undo) (XLogRecPtr lsn, XLogRecord *rptr); + void (*rm_desc) (char *buf, uint8 xl_info, char *rec); + void (*rm_startup) (void); + void (*rm_cleanup) (void); +} RmgrData; + +extern const RmgrData RmgrTable[]; + +#endif /* XLOG_INTERNAL_H */ diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h index 057236e4ed..4507723e5a 100644 --- a/src/include/access/xlogdefs.h +++ b/src/include/access/xlogdefs.h @@ -2,12 +2,12 @@ * xlogdefs.h * * Postgres transaction log manager record pointer and - * system startup number definitions + * timeline number definitions * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.11 2003/12/20 17:31:21 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.12 2004/07/21 22:31:25 tgl Exp $ */ #ifndef XLOG_DEFS_H #define XLOG_DEFS_H @@ -33,12 +33,6 @@ typedef struct XLogRecPtr uint32 xrecoff; /* byte offset of location in log file */ } XLogRecPtr; -typedef struct XLogwrtResult -{ - XLogRecPtr Write; /* last byte + 1 written out */ - XLogRecPtr Flush; /* last byte + 1 flushed */ -} XLogwrtResult; - /* * Macros for comparing XLogRecPtrs @@ -57,10 +51,16 @@ typedef struct XLogwrtResult #define XLByteEQ(a, b) \ ((a).xlogid == (b).xlogid && (a).xrecoff == (b).xrecoff) + /* - * StartUpID (SUI) - system startups counter. It's to allow removing - * pg_clog after shutdown, in future. + * TimeLineID (TLI) - identifies different database histories to prevent + * confusion after restoring a prior state of a database installation. + * TLI does not change in a normal stop/restart of the database (including + * crash-and-recover cases); but we must assign a new TLI after doing + * a recovery to a prior state, a/k/a point-in-time recovery. This makes + * the new WAL logfile sequence we generate distinguishable from the + * sequence that was generated in the previous incarnation. */ -typedef uint32 StartUpID; +typedef uint32 TimeLineID; #endif /* XLOG_DEFS_H */ diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index 8b1dc671fa..a5b8f30978 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -6,19 +6,15 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.14 2004/02/11 22:55:25 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.15 2004/07/21 22:31:25 tgl Exp $ */ #ifndef XLOG_UTILS_H #define XLOG_UTILS_H #include "access/rmgr.h" #include "storage/buf.h" -#include "storage/itemptr.h" #include "utils/rel.h" -extern int XLogIsOwnerOfTuple(RelFileNode hnode, ItemPointer iptr, - TransactionId xid, CommandId cid); -extern bool XLogIsValidTuple(RelFileNode hnode, ItemPointer iptr); extern void XLogInitRelationCache(void); extern void XLogCloseRelationCache(void); diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 2319da9b8e..2f5d51ff98 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.15 2004/06/03 02:08:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.16 2004/07/21 22:31:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,7 +22,7 @@ /* Version identifier for this pg_control format */ -#define PG_CONTROL_VERSION 73 +#define PG_CONTROL_VERSION 74 /* * Body of CheckPoint XLOG records. This is declared here because we keep @@ -30,13 +30,13 @@ */ typedef struct CheckPoint { - XLogRecPtr redo; /* next RecPtr available when we */ - /* began to create CheckPoint */ - /* (i.e. REDO start point) */ - XLogRecPtr undo; /* first record of oldest in-progress */ - /* transaction when we started */ - /* (i.e. UNDO end point) */ - StartUpID ThisStartUpID; /* current SUI */ + XLogRecPtr redo; /* next RecPtr available when we + * began to create CheckPoint + * (i.e. REDO start point) */ + XLogRecPtr undo; /* first record of oldest in-progress + * transaction when we started + * (i.e. UNDO end point) */ + TimeLineID ThisTimeLineID; /* current TLI */ TransactionId nextXid; /* next free XID */ Oid nextOid; /* next free OID */ time_t time; /* time stamp of checkpoint */ @@ -46,8 +46,6 @@ typedef struct CheckPoint #define XLOG_CHECKPOINT_SHUTDOWN 0x00 #define XLOG_CHECKPOINT_ONLINE 0x10 #define XLOG_NEXTOID 0x30 -#define XLOG_FILE_HEADER 0x40 -#define XLOG_WASTED_SPACE 0x50 /* System status indicator */ diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 727ec508a3..21148c9937 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.59 2004/07/01 00:51:43 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.60 2004/07/21 22:31:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -87,13 +87,22 @@ typedef uint16 LocationIndex; /* * disk page organization + * * space management information generic to any page * + * pd_lsn - identifies xlog record for last change to this page. + * pd_tli - ditto. * pd_lower - offset to start of free space. * pd_upper - offset to end of free space. * pd_special - offset to start of special space. * pd_pagesize_version - size in bytes and page layout version number. * + * The LSN is used by the buffer manager to enforce the basic rule of WAL: + * "thou shalt write xlog before data". A dirty buffer cannot be dumped + * to disk until xlog has been flushed at least as far as the page's LSN. + * We also store the TLI for identification purposes (it is not clear that + * this is actually necessary, but it seems like a good idea). + * * The page version number and page size are packed together into a single * uint16 field. This is for historical reasons: before PostgreSQL 7.3, * there was no concept of a page version number, and doing it this way @@ -109,13 +118,10 @@ typedef uint16 LocationIndex; */ typedef struct PageHeaderData { - /* XXX LSN is member of *any* block, not */ - /* only page-organized - 'll change later */ - XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog */ - /* record for last change of this page */ - StartUpID pd_sui; /* SUI of last changes (currently it's */ - /* used by heap AM only) */ - + /* XXX LSN is member of *any* block, not only page-organized ones */ + XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog + * record for last change to this page */ + TimeLineID pd_tli; /* TLI of last change */ LocationIndex pd_lower; /* offset to start of free space */ LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ @@ -298,10 +304,10 @@ typedef PageHeaderData *PageHeader; #define PageSetLSN(page, lsn) \ (((PageHeader) (page))->pd_lsn = (lsn)) -#define PageGetSUI(page) \ - (((PageHeader) (page))->pd_sui) -#define PageSetSUI(page, sui) \ - (((PageHeader) (page))->pd_sui = (StartUpID) (sui)) +#define PageGetTLI(page) \ + (((PageHeader) (page))->pd_tli) +#define PageSetTLI(page, tli) \ + (((PageHeader) (page))->pd_tli = (tli)) /* ---------------------------------------------------------------- * extern declarations -- 2.40.0