From 35af5422f640e74029a167d106604da35ae64c5f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 21 Aug 2006 16:16:31 +0000 Subject: [PATCH] Make the server track an 'XID epoch', that is, maintain higher-order bits of the transaction ID counter. Nothing is done with the epoch except to store it in checkpoint records, but this provides a foundation with which add-on code can pretend that XIDs never wrap around. This is a severely trimmed and rewritten version of the xxid patch submitted by Marko Kreen. Per discussion, the epoch counter seems the only part of xxid that really needs to be in the core server. --- doc/src/sgml/ref/pg_resetxlog.sgml | 28 ++++++-- src/backend/access/transam/xlog.c | 94 ++++++++++++++++++++++--- src/bin/pg_controldata/pg_controldata.c | 5 +- src/bin/pg_resetxlog/pg_resetxlog.c | 86 ++++++++++++++++------ src/include/access/xlog.h | 3 +- src/include/catalog/pg_control.h | 5 +- 6 files changed, 177 insertions(+), 44 deletions(-) diff --git a/doc/src/sgml/ref/pg_resetxlog.sgml b/doc/src/sgml/ref/pg_resetxlog.sgml index acdf0c7aed..2450462169 100644 --- a/doc/src/sgml/ref/pg_resetxlog.sgml +++ b/doc/src/sgml/ref/pg_resetxlog.sgml @@ -1,5 +1,5 @@ @@ -22,6 +22,7 @@ PostgreSQL documentation -n -ooid -x xid + -e xid_epoch -m mxid -O mxoff -l timelineid,fileid,seg @@ -61,9 +62,9 @@ PostgreSQL documentation by specifying the -f (force) switch. In this case plausible values will be substituted for the missing data. Most of the fields can be expected to match, but manual assistance may be needed for the next OID, - next transaction ID, next multitransaction ID and offset, + next transaction ID and epoch, next multitransaction ID and offset, WAL starting address, and database locale fields. - The first five of these can be set using the switches discussed below. + The first six of these can be set using the switches discussed below. pg_resetxlog's own environment is the source for its guess at the locale fields; take care that LANG and so forth match the environment that initdb was run in. @@ -76,11 +77,12 @@ PostgreSQL documentation - The -o, -x, -m, -O, + The -o, -x, -e, + -m, -O, and -l - switches allow the next OID, next transaction ID, next multitransaction - ID, next multitransaction offset, and WAL starting address values to - be set manually. These are only needed when + switches allow the next OID, next transaction ID, next transaction ID's + epoch, next multitransaction ID, next multitransaction offset, and WAL + starting address values to be set manually. These are only needed when pg_resetxlog is unable to determine appropriate values by reading pg_control. Safe values may be determined as follows: @@ -146,6 +148,18 @@ PostgreSQL documentation get the next-OID setting right. + + + + The transaction ID epoch is not actually stored anywhere in the database + except in the field that is set by pg_resetxlog, + so any value will work so far as the database itself is concerned. + You might need to adjust this value to ensure that replication + systems such as Slony-I work correctly — + if so, an appropriate value should be obtainable from the state of + the downstream replicated database. + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index d78f1c3074..16fb6b5e5e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.248 2006/08/17 23:04:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.249 2006/08/21 16:16:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -312,10 +312,8 @@ static XLogRecPtr RedoRecPtr; * new log file. * * CheckpointLock: must be held to do a checkpoint (ensures only one - * checkpointer at a time; even though the postmaster won't launch - * parallel checkpoint processes, we need this because manual checkpoints - * could be launched simultaneously). XXX now that all checkpoints are - * done by the bgwriter, isn't this lock redundant? + * checkpointer at a time; currently, with all checkpoints done by the + * bgwriter, this is just pro forma). * *---------- */ @@ -363,9 +361,13 @@ typedef struct XLogCtlData { /* Protected by WALInsertLock: */ XLogCtlInsert Insert; + /* Protected by info_lck: */ XLogwrtRqst LogwrtRqst; XLogwrtResult LogwrtResult; + uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */ + TransactionId ckptXid; + /* Protected by WALWriteLock: */ XLogCtlWrite Write; @@ -380,7 +382,7 @@ typedef struct XLogCtlData int XLogCacheBlck; /* highest allocated xlog buffer index */ TimeLineID ThisTimeLineID; - slock_t info_lck; /* locks shared LogwrtRqst/LogwrtResult */ + slock_t info_lck; /* locks shared variables shown above */ } XLogCtlData; static XLogCtlData *XLogCtl = NULL; @@ -4086,6 +4088,7 @@ BootStrapXLOG(void) checkPoint.redo.xrecoff = SizeOfXLogLongPHD; checkPoint.undo = checkPoint.redo; checkPoint.ThisTimeLineID = ThisTimeLineID; + checkPoint.nextXidEpoch = 0; checkPoint.nextXid = FirstNormalTransactionId; checkPoint.nextOid = FirstBootstrapObjectId; checkPoint.nextMulti = FirstMultiXactId; @@ -4752,8 +4755,9 @@ StartupXLOG(void) checkPoint.undo.xlogid, checkPoint.undo.xrecoff, wasShutdown ? "TRUE" : "FALSE"))); ereport(LOG, - (errmsg("next transaction ID: %u; next OID: %u", - checkPoint.nextXid, checkPoint.nextOid))); + (errmsg("next transaction ID: %u/%u; next OID: %u", + checkPoint.nextXidEpoch, checkPoint.nextXid, + checkPoint.nextOid))); ereport(LOG, (errmsg("next MultiXactId: %u; next MultiXactOffset: %u", checkPoint.nextMulti, checkPoint.nextMultiOffset))); @@ -5135,6 +5139,10 @@ StartupXLOG(void) /* start the archive_timeout timer running */ XLogCtl->Write.lastSegSwitchTime = ControlFile->time; + /* initialize shared-memory copy of latest checkpoint XID/epoch */ + XLogCtl->ckptXidEpoch = ControlFile->checkPointCopy.nextXidEpoch; + XLogCtl->ckptXid = ControlFile->checkPointCopy.nextXid; + /* Start up the commit log and related stuff, too */ StartupCLOG(); StartupSUBTRANS(oldestActiveXID); @@ -5364,6 +5372,46 @@ GetRecentNextXid(void) return ControlFile->checkPointCopy.nextXid; } +/* + * GetNextXidAndEpoch - get the current nextXid value and associated epoch + * + * This is exported for use by code that would like to have 64-bit XIDs. + * We don't really support such things, but all XIDs within the system + * can be presumed "close to" the result, and thus the epoch associated + * with them can be determined. + */ +void +GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch) +{ + uint32 ckptXidEpoch; + TransactionId ckptXid; + TransactionId nextXid; + + /* Must read checkpoint info first, else have race condition */ + { + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + SpinLockAcquire(&xlogctl->info_lck); + ckptXidEpoch = xlogctl->ckptXidEpoch; + ckptXid = xlogctl->ckptXid; + SpinLockRelease(&xlogctl->info_lck); + } + + /* Now fetch current nextXid */ + nextXid = ReadNewTransactionId(); + + /* + * nextXid is certainly logically later than ckptXid. So if it's + * numerically less, it must have wrapped into the next epoch. + */ + if (nextXid < ckptXid) + ckptXidEpoch++; + + *xid = nextXid; + *epoch = ckptXidEpoch; +} + /* * This must be called ONCE during postmaster or standalone-backend shutdown */ @@ -5531,6 +5579,11 @@ CreateCheckPoint(bool shutdown, bool force) checkPoint.nextXid = ShmemVariableCache->nextXid; LWLockRelease(XidGenLock); + /* Increase XID epoch if we've wrapped around since last checkpoint */ + checkPoint.nextXidEpoch = ControlFile->checkPointCopy.nextXidEpoch; + if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid) + checkPoint.nextXidEpoch++; + LWLockAcquire(OidGenLock, LW_SHARED); checkPoint.nextOid = ShmemVariableCache->nextOid; if (!shutdown) @@ -5600,6 +5653,17 @@ CreateCheckPoint(bool shutdown, bool force) UpdateControlFile(); LWLockRelease(ControlFileLock); + /* Update shared-memory copy of checkpoint XID/epoch */ + { + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + SpinLockAcquire(&xlogctl->info_lck); + xlogctl->ckptXidEpoch = checkPoint.nextXidEpoch; + xlogctl->ckptXid = checkPoint.nextXid; + SpinLockRelease(&xlogctl->info_lck); + } + /* * We are now done with critical updates; no need for system panic if we * have trouble while fooling with offline log segments. @@ -5803,6 +5867,10 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); + /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ + ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch; + ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; + /* * TLI may change in a shutdown checkpoint, but it shouldn't decrease */ @@ -5836,6 +5904,11 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) } MultiXactAdvanceNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); + + /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ + ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch; + ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; + /* TLI should not change in an on-line checkpoint */ if (checkPoint.ThisTimeLineID != ThisTimeLineID) ereport(PANIC, @@ -5861,10 +5934,11 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec) CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "checkpoint: redo %X/%X; undo %X/%X; " - "tli %u; xid %u; oid %u; multi %u; offset %u; %s", + "tli %u; xid %u/%u; oid %u; multi %u; offset %u; %s", checkpoint->redo.xlogid, checkpoint->redo.xrecoff, checkpoint->undo.xlogid, checkpoint->undo.xrecoff, - checkpoint->ThisTimeLineID, checkpoint->nextXid, + checkpoint->ThisTimeLineID, + checkpoint->nextXidEpoch, checkpoint->nextXid, checkpoint->nextOid, checkpoint->nextMulti, checkpoint->nextMultiOffset, diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index e0f3cfd2d9..cbde5357ed 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -6,7 +6,7 @@ * copyright (c) Oliver Elphick , 2001; * licence: BSD * - * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.30 2006/08/07 16:57:56 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.31 2006/08/21 16:16:31 tgl Exp $ */ #include "postgres.h" @@ -177,7 +177,8 @@ main(int argc, char *argv[]) ControlFile.checkPointCopy.undo.xrecoff); printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID); - printf(_("Latest checkpoint's NextXID: %u\n"), + printf(_("Latest checkpoint's NextXID: %u/%u\n"), + ControlFile.checkPointCopy.nextXidEpoch, ControlFile.checkPointCopy.nextXid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c index c7589e92c1..a8308388e8 100644 --- a/src/bin/pg_resetxlog/pg_resetxlog.c +++ b/src/bin/pg_resetxlog/pg_resetxlog.c @@ -23,7 +23,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.51 2006/08/07 16:57:56 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.52 2006/08/21 16:16:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -71,6 +71,7 @@ main(int argc, char *argv[]) int c; bool force = false; bool noupdate = false; + uint32 set_xid_epoch = -1; TransactionId set_xid = 0; Oid set_oid = 0; MultiXactId set_mxid = 0; @@ -104,7 +105,7 @@ main(int argc, char *argv[]) } - while ((c = getopt(argc, argv, "fl:m:no:O:x:")) != -1) + while ((c = getopt(argc, argv, "fl:m:no:O:x:e:")) != -1) { switch (c) { @@ -116,6 +117,21 @@ main(int argc, char *argv[]) noupdate = true; break; + case 'e': + set_xid_epoch = strtoul(optarg, &endptr, 0); + if (endptr == optarg || *endptr != '\0') + { + fprintf(stderr, _("%s: invalid argument for option -e\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + exit(1); + } + if (set_xid_epoch == -1) + { + fprintf(stderr, _("%s: transaction ID epoch (-e) must not be -1\n"), progname); + exit(1); + } + break; + case 'x': set_xid = strtoul(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0') @@ -271,6 +287,9 @@ main(int argc, char *argv[]) * Adjust fields if required by switches. (Do this now so that printout, * if any, includes these values.) */ + if (set_xid_epoch != -1) + ControlFile.checkPointCopy.nextXidEpoch = set_xid_epoch; + if (set_xid != 0) ControlFile.checkPointCopy.nextXid = set_xid; @@ -441,6 +460,7 @@ GuessControlValues(void) ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD; ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo; ControlFile.checkPointCopy.ThisTimeLineID = 1; + ControlFile.checkPointCopy.nextXidEpoch = 0; ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */ ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId; ControlFile.checkPointCopy.nextMulti = FirstMultiXactId; @@ -513,29 +533,50 @@ PrintControlValues(bool guessed) snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT, ControlFile.system_identifier); - printf(_("pg_control version number: %u\n"), ControlFile.pg_control_version); - printf(_("Catalog version number: %u\n"), ControlFile.catalog_version_no); - printf(_("Database system identifier: %s\n"), sysident_str); - printf(_("Current log file ID: %u\n"), ControlFile.logId); - printf(_("Next log file segment: %u\n"), ControlFile.logSeg); - printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID); - printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid); - printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); - printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti); - printf(_("Latest checkpoint's NextMultiOffset: %u\n"), ControlFile.checkPointCopy.nextMultiOffset); - printf(_("Maximum data alignment: %u\n"), ControlFile.maxAlign); + printf(_("pg_control version number: %u\n"), + ControlFile.pg_control_version); + printf(_("Catalog version number: %u\n"), + ControlFile.catalog_version_no); + printf(_("Database system identifier: %s\n"), + sysident_str); + printf(_("Current log file ID: %u\n"), + ControlFile.logId); + printf(_("Next log file segment: %u\n"), + ControlFile.logSeg); + printf(_("Latest checkpoint's TimeLineID: %u\n"), + ControlFile.checkPointCopy.ThisTimeLineID); + printf(_("Latest checkpoint's NextXID: %u/%u\n"), + ControlFile.checkPointCopy.nextXidEpoch, + ControlFile.checkPointCopy.nextXid); + printf(_("Latest checkpoint's NextOID: %u\n"), + ControlFile.checkPointCopy.nextOid); + printf(_("Latest checkpoint's NextMultiXactId: %u\n"), + ControlFile.checkPointCopy.nextMulti); + printf(_("Latest checkpoint's NextMultiOffset: %u\n"), + ControlFile.checkPointCopy.nextMultiOffset); + printf(_("Maximum data alignment: %u\n"), + ControlFile.maxAlign); /* we don't print floatFormat since can't say much useful about it */ - printf(_("Database block size: %u\n"), ControlFile.blcksz); - printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size); - printf(_("WAL block size: %u\n"), ControlFile.xlog_blcksz); - printf(_("Bytes per WAL segment: %u\n"), ControlFile.xlog_seg_size); - printf(_("Maximum length of identifiers: %u\n"), ControlFile.nameDataLen); - printf(_("Maximum columns in an index: %u\n"), ControlFile.indexMaxKeys); + printf(_("Database block size: %u\n"), + ControlFile.blcksz); + printf(_("Blocks per segment of large relation: %u\n"), + ControlFile.relseg_size); + printf(_("WAL block size: %u\n"), + ControlFile.xlog_blcksz); + printf(_("Bytes per WAL segment: %u\n"), + ControlFile.xlog_seg_size); + printf(_("Maximum length of identifiers: %u\n"), + ControlFile.nameDataLen); + printf(_("Maximum columns in an index: %u\n"), + ControlFile.indexMaxKeys); printf(_("Date/time type storage: %s\n"), (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers"))); - printf(_("Maximum length of locale name: %u\n"), ControlFile.localeBuflen); - printf(_("LC_COLLATE: %s\n"), ControlFile.lc_collate); - printf(_("LC_CTYPE: %s\n"), ControlFile.lc_ctype); + printf(_("Maximum length of locale name: %u\n"), + ControlFile.localeBuflen); + printf(_("LC_COLLATE: %s\n"), + ControlFile.lc_collate); + printf(_("LC_CTYPE: %s\n"), + ControlFile.lc_ctype); } @@ -810,6 +851,7 @@ usage(void) printf(_(" -o OID set next OID\n")); printf(_(" -O OFFSET set next multitransaction offset\n")); printf(_(" -x XID set next transaction ID\n")); + printf(_(" -e XIDEPOCH set next transaction ID epoch\n")); printf(_(" --help show this help, then exit\n")); printf(_(" --version output version information, then exit\n")); printf(_("\nReport bugs to .\n")); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 22b0f0bb7b..a5ae94b91a 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.73 2006/08/17 23:04:08 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.74 2006/08/21 16:16:31 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -166,5 +166,6 @@ extern void CreateCheckPoint(bool shutdown, bool force); extern void XLogPutNextOid(Oid nextOid); extern XLogRecPtr GetRedoRecPtr(void); extern TransactionId GetRecentNextXid(void); +extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); #endif /* XLOG_H */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 8e405f607e..2b109f2d5a 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.31 2006/08/07 16:57:57 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.32 2006/08/21 16:16:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,7 +22,7 @@ /* Version identifier for this pg_control format */ -#define PG_CONTROL_VERSION 821 +#define PG_CONTROL_VERSION 822 /* * Body of CheckPoint XLOG records. This is declared here because we keep @@ -36,6 +36,7 @@ typedef struct CheckPoint * transaction when we started (i.e. UNDO end * point) */ TimeLineID ThisTimeLineID; /* current TLI */ + uint32 nextXidEpoch; /* higher-order bits of nextXid */ TransactionId nextXid; /* next free XID */ Oid nextOid; /* next free OID */ MultiXactId nextMulti; /* next free MultiXactId */ -- 2.40.0