From: Tom Lane Date: Mon, 19 Jul 2004 02:47:16 +0000 (+0000) Subject: XLOG file archiving and point-in-time recovery. There are still some X-Git-Tag: REL8_0_0BETA1~183 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=66ec2db72840e4e400d879578744420c969ed7bf;p=postgresql XLOG file archiving and point-in-time recovery. There are still some loose ends and a glaring lack of documentation, but it basically works. Simon Riggs with some editorialization by Tom Lane. --- diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index bdd063daa3..d2bafb3957 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -56,7 +56,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.83 2004/07/11 18:01:45 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.84 2004/07/19 02:47:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -64,6 +64,7 @@ #include "postgres.h" #include "access/nbtree.h" +#include "access/xlog.h" #include "miscadmin.h" #include "storage/smgr.h" #include "utils/tuplesort.h" @@ -222,15 +223,9 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2) /* * We need to log index creation in WAL iff WAL archiving is enabled * AND it's not a temp index. - * - * XXX when WAL archiving is actually supported, this test will likely - * need to change; and the hardwired extern is cruddy anyway ... */ - { - extern char XLOG_archive_dir[]; + wstate.btws_use_wal = XLogArchivingActive() && !wstate.index->rd_istemp; - wstate.btws_use_wal = XLOG_archive_dir[0] && !wstate.index->rd_istemp; - } /* reserve the metapage */ wstate.btws_pages_alloced = BTREE_METAPAGE + 1; wstate.btws_pages_written = 0; diff --git a/src/backend/access/transam/recovery.conf.sample b/src/backend/access/transam/recovery.conf.sample new file mode 100644 index 0000000000..ad1bd5879b --- /dev/null +++ b/src/backend/access/transam/recovery.conf.sample @@ -0,0 +1,70 @@ +# ------------------------------- +# PostgreSQL recovery config file +# ------------------------------- +# +# Edit this file to provide the parameters that PostgreSQL +# needs to perform an archive recovery of a database +# +# If "recovery.conf" is present in the PostgreSQL data directory, it is +# read on postmaster startup. After successful recovery, it is renamed +# to "recovery.done" to ensure that we do not accidentally re-enter archive +# recovery mode. +# +# This file consists of lines of the form: +# +# name = 'value' +# +# (The quotes around the value are NOT optional, but the "=" is.) +# +# Comments are introduced with '#'. +# +# The complete list of option names and +# allowed values can be found in the PostgreSQL documentation. The +# commented-out settings shown below are sample values. +# +#--------------------------------------------------------------------------- +# REQUIRED PARAMETERS +#--------------------------------------------------------------------------- +# +# restore command +# +# specifies the shell command that is executed to copy log files +# back from archival storage. The command string may contain %f, +# which is replaced by the name of the desired log file, and %p, +# which is replaced by the absolute path to copy the log file to. +# +# It is important that the command return nonzero exit status on failure. +# The command *will* be asked for log files that are not present in the +# archive; it must return nonzero when so asked. +# +# NOTE that the basename of %p will be different from %f; do not +# expect them to be interchangeable. +# +# +#restore_command = 'cp /mnt/server/archivedir/%f %p' +# +# +#--------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +#--------------------------------------------------------------------------- +# +# By default, recovery will rollforward to the end of the WAL log. +# If you want to stop rollforward before that point, you +# MUST set a recovery target. +# +# You may set a recovery target either by transactionId, or +# by timestamp. Recovery may either include or exclude the +# records with the recovery target value (ie, stop either just +# after or just before the given target). +# +#recovery_target_time = '2004-07-14 22:39:00' +# +# note: target time is interpreted by strptime() and must therefore be +# given in your system's default timezone. +# +#recovery_target_xid = '11000' +# +# true or false +#recovery_target_inclusive = 'true' +# +#--------------------------------------------------------------------------- diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a6f53ba79f..0466fbfa53 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,13 +7,14 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.147 2004/07/01 00:49:50 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.148 2004/07/19 02:47:05 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include #include #include #include @@ -33,6 +34,7 @@ #include "storage/bufpage.h" #include "storage/fd.h" #include "storage/lwlock.h" +#include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/sinval.h" #include "storage/spin.h" @@ -87,10 +89,9 @@ /* User-settable parameters */ int CheckPointSegments = 3; int XLOGbuffers = 8; +char *XLogArchiveCommand = NULL; char *XLOG_sync_method = NULL; const char XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR; -char XLOG_archive_dir[MAXPGPATH]; /* null string means - * delete 'em */ #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -125,8 +126,19 @@ static int open_sync_bit = DEFAULT_SYNC_FLAGBIT; */ StartUpID ThisStartUpID = 0; -/* Are we doing recovery by reading XLOG? */ +/* Are we doing recovery from XLOG? */ bool InRecovery = false; +/* Are we recovering using offline XLOG archives? */ +static bool InArchiveRecovery = false; +/* Was the last file restored from archive, or local? */ +static bool restoredFromArchive = false; + +static char recoveryRestoreCommand[MAXPGPATH]; +static bool recoveryTarget = false; +static bool recoveryTargetExact = false; +static bool recoveryTargetInclusive = true; +static TransactionId recoveryTargetXid; +static time_t recoveryTargetTime; /* * MyLastRecPtr points to the start of the last XLOG record inserted by the @@ -369,10 +381,6 @@ static ControlFileData *ControlFile = NULL; ((xlrp).xrecoff - 1) / XLogSegSize == (logSeg)) -#define XLogFileName(path, log, seg) \ - snprintf(path, MAXPGPATH, "%s/%08X%08X", \ - XLogDir, log, seg) - #define PrevBufIdx(idx) \ (((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1)) @@ -383,6 +391,21 @@ static ControlFileData *ControlFile = NULL; ((xrecoff) % BLCKSZ >= SizeOfXLogPHD && \ (BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord) +/* + * These macros encapsulate knowledge about the exact layout of XLog file + * names as well as archive-status file names. + */ +#define MAXFNAMELEN 32 + +#define XLogFileName(fname, log, seg) \ + snprintf(fname, MAXFNAMELEN, "%08X%08X", log, seg) + +#define XLogFilePath(path, log, seg) \ + snprintf(path, MAXPGPATH, "%s/%08X%08X", XLogDir, log, seg) + +#define StatusFilePath(path, xlog, suffix) \ + snprintf(path, MAXPGPATH, "%s/archive_status/%s%s", XLogDir, xlog, suffix) + /* * _INTL_MAXLOGRECSZ: max space needed for a record including header and * any backup-block data. @@ -434,6 +457,14 @@ static StartUpID lastReadSUI; static bool InRedo = false; +static void XLogArchiveNotify(const char *xlog); +static void XLogArchiveNotifySeg(uint32 log, uint32 seg); +static bool XLogArchiveIsDone(const char *xlog); +static void XLogArchiveCleanup(const char *xlog); +static void readRecoveryCommandFile(void); +static void exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, + uint32 xrecoff); +static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); static bool AdvanceXLInsertBuffer(void); static bool WasteXLInsertBuffer(void); @@ -444,6 +475,7 @@ static bool InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, bool find_free, int max_advance, bool use_lock); static int XLogFileOpen(uint32 log, uint32 seg, bool econt); +static void RestoreArchivedXLog(char *path, uint32 log, uint32 seg); static void PreallocXlogFiles(XLogRecPtr endptr); static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr); static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer); @@ -911,6 +943,110 @@ begin:; return (RecPtr); } +/* + * XLogArchiveNotify + * + * Create an archive notification file + * + * The name of the notification file is the message that will be picked up + * by the archiver, e.g. we write 00000001000000C6.ready + * and the archiver then knows to archive XLogDir/00000001000000C6, + * then when complete, rename it to 00000001000000C6.done + */ +static void +XLogArchiveNotify(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + FILE *fd; + + /* insert an otherwise empty file called .ready */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + fd = AllocateFile(archiveStatusPath, "w"); + if (fd == NULL) { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not create archive status file \"%s\": %m", + archiveStatusPath))); + return; + } + if (FreeFile(fd)) { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write archive status file \"%s\": %m", + archiveStatusPath))); + return; + } + + /* Notify archiver that it's got something to do */ + if (IsUnderPostmaster) + SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER); +} + +/* + * Convenience routine to notify using log/seg representation of filename + */ +static void +XLogArchiveNotifySeg(uint32 log, uint32 seg) +{ + char xlog[MAXFNAMELEN]; + + XLogFileName(xlog, log, seg); + XLogArchiveNotify(xlog); +} + +/* + * XLogArchiveIsDone + * + * Checks for a ".done" archive notification file. This is called when we + * are ready to delete or recycle an old XLOG segment file. If it is okay + * to delete it then return true. + * + * If .done exists, then return true; else if .ready exists, + * then return false; else create .ready and return false. The + * last case covers the possibility that the original attempt to create + * .ready failed. + */ +static bool +XLogArchiveIsDone(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + struct stat stat_buf; + + /* First check for .done --- this is the expected case */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return true; + + /* check for .ready --- this means archiver is still busy with it */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return false; + + /* Race condition --- maybe archiver just finished, so recheck */ + StatusFilePath(archiveStatusPath, xlog, ".done"); + if (stat(archiveStatusPath, &stat_buf) == 0) + return true; + + /* Retry creation of the .ready file */ + XLogArchiveNotify(xlog); + return false; +} + +/* + * XLogArchiveCleanup + * + * Cleanup an archive notification file for a particular xlog segment + */ +static void +XLogArchiveCleanup(const char *xlog) +{ + char archiveStatusPath[MAXPGPATH]; + + StatusFilePath(archiveStatusPath, xlog, ".done"); + unlink(archiveStatusPath); + /* should we complain about failure? */ +} + /* * Advance the Insert state to the next buffer page, writing out the next * buffer if it still contains unwritten data. @@ -1255,11 +1391,17 @@ XLogWrite(XLogwrtRqst WriteRqst) * and re-open prior segments when an fsync request comes along * later. Doing it here ensures that one and only one backend will * perform this fsync. + * + * This is also the right place to notify the Archiver that the + * segment is ready to copy to archival storage. */ if (openLogOff >= XLogSegSize && !ispartialpage) { issue_xlog_fsync(); LogwrtResult.Flush = LogwrtResult.Write; /* end of current page */ + + if (XLogArchivingActive()) + XLogArchiveNotifySeg(openLogId, openLogSeg); } if (ispartialpage) @@ -1475,7 +1617,7 @@ XLogFileInit(uint32 log, uint32 seg, int fd; int nbytes; - XLogFileName(path, log, seg); + XLogFilePath(path, log, seg); /* * Try to use existent file (checkpoint maker may have created it @@ -1621,7 +1763,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, char path[MAXPGPATH]; struct stat stat_buf; - XLogFileName(path, log, seg); + XLogFilePath(path, log, seg); /* * We want to be sure that only one process does this at a time. @@ -1647,7 +1789,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, return false; } NextLogSeg(log, seg); - XLogFileName(path, log, seg); + XLogFilePath(path, log, seg); } } @@ -1686,7 +1828,10 @@ XLogFileOpen(uint32 log, uint32 seg, bool econt) char path[MAXPGPATH]; int fd; - XLogFileName(path, log, seg); + if (InArchiveRecovery) + RestoreArchivedXLog(path, log, seg); + else + XLogFilePath(path, log, seg); fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT, S_IRUSR | S_IWUSR); @@ -1706,9 +1851,192 @@ XLogFileOpen(uint32 log, uint32 seg, bool econt) path, log, seg))); } + /* + * XXX this is a pretty horrid hack. Remove after implementing timelines. + * + * if we switched back to local xlogs after having been + * restoring from archive, we need to make sure that the + * local files don't get removed by end-of-recovery checkpoint + * in case we need to re-run the recovery + * + * we want to copy these away as soon as possible, so set + * the archive status flag to .ready for them + * in case admin isn't cautious enough to have done this anyway + * + * XXX this is completely broken, because there is no guarantee this file + * is actually complete and ready to be archived. Also, what if there's + * a .done file for them? + */ + if (InArchiveRecovery && !restoredFromArchive) + XLogArchiveNotifySeg(log, seg); + return (fd); } +/* + * Get next logfile segment when using off-line archive for recovery + * + * Attempt to retrieve the specified segment from off-line archival storage. + * If successful, fill "path" with its complete path (note that this will be + * a temp file name that doesn't follow the normal naming convention). + * + * If not successful, fill "path" with the name of the normal on-line segment + * file (which may or may not actually exist, but we'll try to use it). + */ +static void +RestoreArchivedXLog(char *path, uint32 log, uint32 seg) +{ + char xlogfname[MAXFNAMELEN]; + char xlogpath[MAXPGPATH]; + char xlogRestoreCmd[MAXPGPATH]; + char *dp; + char *endp; + const char *sp; + int rc; + struct stat stat_buf; + + /* + * When doing archive recovery, we always prefer an archived log file + * even if a file of the same name exists in XLogDir. The reason is + * that the file in XLogDir could be an old, un-filled or partly-filled + * version that was copied and restored as part of backing up $PGDATA. + * + * We could try to optimize this slightly by checking the local + * copy lastchange timestamp against the archived copy, + * but we have no API to do this, nor can we guarantee that the + * lastchange timestamp was preserved correctly when we copied + * to archive. Our aim is robustness, so we elect not to do this. + * + * If we cannot obtain the log file from the archive, however, we + * will try to use the XLogDir file if it exists. This is so that + * we can make use of log segments that weren't yet transferred to + * the archive. + * + * Notice that we don't actually overwrite any files when we copy back + * from archive because the recoveryRestoreCommand may inadvertently + * restore inappropriate xlogs, or they may be corrupt, so we may + * wish to fallback to the segments remaining in current XLogDir later. + * The copy-from-archive filename is always the same, ensuring that we + * don't run out of disk space on long recoveries. + */ + XLogFileName(xlogfname, log, seg); + snprintf(xlogpath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir); + + /* + * Make sure there is no existing RECOVERYXLOG file. + */ + if (stat(xlogpath, &stat_buf) != 0) + { + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not stat \"%s\": %m", + xlogpath))); + } + else + { + if (unlink(xlogpath) != 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not remove \"%s\": %m", + xlogpath))); + } + + /* + * construct the command to be executed + */ + dp = xlogRestoreCmd; + endp = xlogRestoreCmd + MAXPGPATH - 1; + *endp = '\0'; + + for (sp = recoveryRestoreCommand; *sp; sp++) + { + if (*sp == '%') + { + switch (sp[1]) + { + case 'p': + /* %p: full path of target file */ + sp++; + StrNCpy(dp, xlogpath, endp-dp); + dp += strlen(dp); + break; + case 'f': + /* %f: filename of desired file */ + sp++; + StrNCpy(dp, xlogfname, endp-dp); + dp += strlen(dp); + break; + case '%': + /* convert %% to a single % */ + sp++; + if (dp < endp) + *dp++ = *sp; + break; + default: + /* otherwise treat the % as not special */ + if (dp < endp) + *dp++ = *sp; + break; + } + } + else + { + if (dp < endp) + *dp++ = *sp; + } + } + *dp = '\0'; + + ereport(DEBUG3, + (errmsg_internal("executing restore command \"%s\"", + xlogRestoreCmd))); + + /* + * Copy xlog from archival storage to XLogDir + */ + rc = system(xlogRestoreCmd); + if (rc == 0) + { + /* restore success ... assuming file is really there now ... */ + if (stat(xlogpath, &stat_buf) == 0) { + ereport(LOG, + (errmsg("restored log file \"%s\" from archive", + xlogfname))); + strcpy(path, xlogpath); + restoredFromArchive = true; + return; + } + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not stat \"%s\": %m", + xlogpath))); + } + + /* + * remember, we rollforward UNTIL the restore fails + * so failure here is just part of the process... + * that makes it difficult to determine whether the restore + * failed because there isn't an archive to restore, or + * because the administrator has specified the restore + * program incorrectly. We have to assume the former. + */ + ereport(DEBUG1, + (errmsg("could not restore \"%s\" from archive: return code %d", + xlogfname, rc))); + + /* + * if an archived file is not available, there might still be a version + * of this file in XLogDir, so return that as the filename to open. + * + * In many recovery scenarios we expect this to fail also, but + * if so that just means we've reached the end of WAL. + */ + XLogFilePath(path, log, seg); + restoredFromArchive = false; +} + /* * Preallocate log files beyond the specified log endpoint, according to * the XLOGfile user parameter. @@ -1745,7 +2073,7 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr) uint32 endlogSeg; DIR *xldir; struct dirent *xlde; - char lastoff[32]; + char lastoff[MAXFNAMELEN]; char path[MAXPGPATH]; XLByteToPrevSeg(endptr, endlogId, endlogSeg); @@ -1757,25 +2085,30 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr) errmsg("could not open transaction log directory \"%s\": %m", XLogDir))); - sprintf(lastoff, "%08X%08X", log, seg); + XLogFileName(lastoff, log, seg); errno = 0; while ((xlde = readdir(xldir)) != NULL) { + /* + * use the alphanumeric sorting property of the filenames to decide + * which ones are earlier than the lastoff segment + */ if (strlen(xlde->d_name) == 16 && strspn(xlde->d_name, "0123456789ABCDEF") == 16 && strcmp(xlde->d_name, lastoff) <= 0) { - snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name); - if (XLOG_archive_dir[0]) - { - ereport(LOG, - (errmsg("archiving transaction log file \"%s\"", - xlde->d_name))); - elog(WARNING, "archiving log files is not implemented"); - } - else + bool recycle; + + if (XLogArchivingActive()) + recycle = XLogArchiveIsDone(xlde->d_name); + else + recycle = true; + + if (recycle) { + snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name); + /* * Before deleting the file, see if it can be recycled as * a future log segment. We allow recycling segments up @@ -1794,10 +2127,12 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr) { /* No need for any more future segments... */ ereport(LOG, - (errmsg("removing transaction log file \"%s\"", - xlde->d_name))); + (errmsg("removing transaction log file \"%s\"", + xlde->d_name))); unlink(path); } + + XLogArchiveCleanup(xlde->d_name); } } errno = 0; @@ -2771,6 +3106,401 @@ str_time(time_t tnow) return buf; } +/* + * See if there is a recovery command file (recovery.conf), and if so + * read in parameters for archive recovery. + * + * XXX longer term intention is to expand this to + * cater for additional parameters and controls + * possibly use a flex lexer similar to the GUC one + */ +static void +readRecoveryCommandFile(void) +{ + char recoveryCommandFile[MAXPGPATH]; + FILE *fd; + char cmdline[MAXPGPATH]; + bool syntaxError = false; + + snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir); + fd = AllocateFile(recoveryCommandFile, "r"); + if (fd == NULL) + { + if (errno == ENOENT) + return; /* not there, so no archive recovery */ + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not open recovery command file \"%s\": %m", + recoveryCommandFile))); + } + + ereport(LOG, + (errmsg("starting archive recovery"))); + + /* + * Parse the file... + */ + while (fgets(cmdline, MAXPGPATH, fd) != NULL) + { + /* skip leading whitespace and check for # comment */ + char *ptr; + char *tok1; + char *tok2; + + for (ptr = cmdline; *ptr; ptr++) + { + if (!isspace((unsigned char) *ptr)) + break; + } + if (*ptr == '\0' || *ptr == '#') + continue; + + /* identify the quoted parameter value */ + tok1 = strtok(ptr, "'"); + if (!tok1) + { + syntaxError = true; + break; + } + tok2 = strtok(NULL, "'"); + if (!tok2) + { + syntaxError = true; + break; + } + /* reparse to get just the parameter name */ + tok1 = strtok(ptr, " \t="); + if (!tok1) + { + syntaxError = true; + break; + } + + if (strcmp(tok1,"restore_command") == 0) { + StrNCpy(recoveryRestoreCommand, tok2, MAXPGPATH); + ereport(LOG, + (errmsg("restore_command = \"%s\"", + recoveryRestoreCommand))); + } + else if (strcmp(tok1,"recovery_target_xid") == 0) { + errno = 0; + recoveryTargetXid = (TransactionId) strtoul(tok2, NULL, 0); + if (errno == EINVAL || errno == ERANGE) + ereport(FATAL, + (errmsg("recovery_target_xid is not a valid number: \"%s\"", + tok2))); + ereport(LOG, + (errmsg("recovery_target_xid = %u", + recoveryTargetXid))); + recoveryTarget = true; + recoveryTargetExact = true; + } + else if (strcmp(tok1,"recovery_target_time") == 0) { + struct tm tm; + + /* + * if recovery_target_xid specified, then this overrides + * recovery_target_time + */ + if (recoveryTargetExact) + continue; + recoveryTarget = true; + recoveryTargetExact = false; + /* + * convert the time string given + * by the user to the time_t format. + */ + if (strptime(tok2, "%Y-%m-%d %H:%M:%S", &tm) == NULL) + ereport(FATAL, + (errmsg("invalid recovery_target_time \"%s\"", + tok2), + errhint("Correct format is YYYY-MM-DD hh:mm:ss."))); + recoveryTargetTime = mktime(&tm); + if (recoveryTargetTime == (time_t) -1) + ereport(FATAL, + (errmsg("invalid recovery_target_time \"%s\"", + tok2), + errhint("Correct format is YYYY-MM-DD hh:mm:ss."))); + ereport(LOG, + (errmsg("recovery_target_time = %s", + tok2))); + } + else if (strcmp(tok1,"recovery_target_inclusive") == 0) { + /* + * does nothing if a recovery_target is not also set + */ + if (strcmp(tok2, "true") == 0) + recoveryTargetInclusive = true; + else + { + recoveryTargetInclusive = false; + tok2 = "false"; + } + ereport(LOG, + (errmsg("recovery_target_inclusive = %s", tok2))); + } + else + ereport(FATAL, + (errmsg("unrecognized recovery parameter \"%s\"", + tok1))); + } + + FreeFile(fd); + + if (syntaxError) + ereport(FATAL, + (errmsg("syntax error in recovery command file: %s", + cmdline), + errhint("Lines should have the format parameter = 'value'."))); + + /* Check that required parameters were supplied */ + if (recoveryRestoreCommand[0] == '\0') + ereport(FATAL, + (errmsg("recovery command file \"%s\" did not specify restore_command", + recoveryCommandFile))); + + /* + * clearly indicate our state + */ + InArchiveRecovery = true; +} + +/* + * Exit archive-recovery state + */ +static void +exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) +{ + char recoveryPath[MAXPGPATH]; + char xlogpath[MAXPGPATH]; + char recoveryCommandFile[MAXPGPATH]; + char recoveryCommandDone[MAXPGPATH]; + + /* + * Disable fetches from archive, so we can use XLogFileOpen below. + */ + InArchiveRecovery = false; + + /* + * We should have the ending log segment currently open. Verify, + * and then close it (to avoid problems on Windows with trying to + * rename or delete an open file). + */ + Assert(readFile >= 0); + Assert(readId == endLogId); + Assert(readSeg == endLogSeg); + + close(readFile); + readFile = -1; + + /* + * If the segment was fetched from archival storage, we want to replace + * the existing xlog segment (if any) with the archival version. This + * is because whatever is in XLogDir is very possibly older than what + * we have from the archives, since it could have come from restoring + * a PGDATA backup. In any case, the archival version certainly is + * more descriptive of what our current database state is, because that + * is what we replayed from. + * + * XXX there ought to be a timeline increment somewhere around here. + */ + snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir); + XLogFilePath(xlogpath, endLogId, endLogSeg); + + if (restoredFromArchive) + { + ereport(DEBUG3, + (errmsg_internal("moving last restored xlog to \"%s\"", + xlogpath))); + unlink(xlogpath); /* might or might not exist */ + if (rename(recoveryPath, xlogpath) != 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not rename \"%s\" to \"%s\": %m", + recoveryPath, xlogpath))); + /* XXX might we need to fix permissions on the file? */ + } + else + { + /* + * If the latest segment is not archival, but there's still a + * RECOVERYXLOG laying about, get rid of it. + */ + unlink(recoveryPath); /* ignore any error */ + } + + /* + * If we restored to a point-in-time, then the current WAL segment + * probably contains records beyond the stop point. These represent an + * extreme hazard: if we crash in the near future, the replay apparatus + * will know no reason why it shouldn't replay them. Therefore, + * explicitly zero out all the remaining pages of the segment. (We need + * not worry about the partial page in which the last record ends, since + * StartUpXlog will handle zeroing that. Also, there's nothing to do + * if we are right at a segment boundary.) + * + * XXX segment files beyond thhe current one also represent a hazard + * for the same reason. Need to invent timelines to fix this. + */ + + /* align xrecoff to next page, then drop segment part */ + if (xrecoff % BLCKSZ != 0) + xrecoff += (BLCKSZ - xrecoff % BLCKSZ); + xrecoff %= XLogSegSize; + + if (recoveryTarget && xrecoff != 0) + { + int fd; + char zbuffer[BLCKSZ]; + + fd = XLogFileOpen(endLogId, endLogSeg, false); + MemSet(zbuffer, 0, sizeof(zbuffer)); + if (lseek(fd, (off_t) xrecoff, SEEK_SET) < 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not seek in file \"%s\": %m", + xlogpath))); + for (; xrecoff < XLogSegSize; xrecoff += sizeof(zbuffer)) + { + errno = 0; + if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer)) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", xlogpath))); + } + } + if (pg_fsync(fd) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", xlogpath))); + if (close(fd)) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", xlogpath))); + } + + /* + * Rename the config file out of the way, so that we don't accidentally + * re-enter archive recovery mode in a subsequent crash. + */ + snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir); + snprintf(recoveryCommandDone, MAXPGPATH, "%s/recovery.done", DataDir); + unlink(recoveryCommandDone); + if (rename(recoveryCommandFile, recoveryCommandDone) != 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not rename \"%s\" to \"%s\": %m", + recoveryCommandFile, recoveryCommandDone))); + + ereport(LOG, + (errmsg("archive recovery complete"))); +} + +/* + * For point-in-time recovery, this function decides whether we want to + * stop applying the XLOG at or after the current record. + * + * Returns TRUE if we are stopping, FALSE otherwise. On TRUE return, + * *includeThis is set TRUE if we should apply this record before stopping. + */ +static bool +recoveryStopsHere(XLogRecord *record, bool *includeThis) +{ + bool stopsHere; + uint8 record_info; + time_t recordXtime; + + /* Do we have a PITR target at all? */ + if (!recoveryTarget) + return false; + + /* We only consider stopping at COMMIT or ABORT records */ + if (record->xl_rmid != RM_XACT_ID) + return false; + record_info = record->xl_info & ~XLR_INFO_MASK; + if (record_info == XLOG_XACT_COMMIT) + { + xl_xact_commit *recordXactCommitData; + + recordXactCommitData = (xl_xact_commit *) XLogRecGetData(record); + recordXtime = recordXactCommitData->xtime; + } + else if (record_info == XLOG_XACT_ABORT) + { + xl_xact_abort *recordXactAbortData; + + recordXactAbortData = (xl_xact_abort *) XLogRecGetData(record); + recordXtime = recordXactAbortData->xtime; + } + else + return false; + + if (recoveryTargetExact) + { + /* + * there can be only one transaction end record + * with this exact transactionid + * + * when testing for an xid, we MUST test for + * equality only, since transactions are numbered + * in the order they start, not the order they + * complete. A higher numbered xid will complete + * before you about 50% of the time... + */ + stopsHere = (record->xl_xid == recoveryTargetXid); + if (stopsHere) + *includeThis = recoveryTargetInclusive; + } + else + { + /* + * there can be many transactions that + * share the same commit time, so + * we stop after the last one, if we are + * inclusive, or stop at the first one + * if we are exclusive + */ + if (recoveryTargetInclusive) + stopsHere = (recordXtime > recoveryTargetTime); + else + stopsHere = (recordXtime >= recoveryTargetTime); + if (stopsHere) + *includeThis = false; + } + + if (stopsHere) + { + if (record_info == XLOG_XACT_COMMIT) + { + if (*includeThis) + ereport(LOG, + (errmsg("recovery stopping after commit of transaction %u, time %s", + record->xl_xid, str_time(recordXtime)))); + else + ereport(LOG, + (errmsg("recovery stopping before commit of transaction %u, time %s", + record->xl_xid, str_time(recordXtime)))); + } + else + { + if (*includeThis) + ereport(LOG, + (errmsg("recovery stopping after abort of transaction %u, time %s", + record->xl_xid, str_time(recordXtime)))); + else + ereport(LOG, + (errmsg("recovery stopping before abort of transaction %u, time %s", + record->xl_xid, str_time(recordXtime)))); + } + } + + return stopsHere; +} + /* * This must be called ONCE during postmaster or standalone-backend startup */ @@ -2784,6 +3514,8 @@ StartupXLOG(void) LastRec, checkPointLoc, EndOfLog; + uint32 endLogId; + uint32 endLogSeg; XLogRecord *record; char *buffer; uint32 freespace; @@ -2833,6 +3565,12 @@ StartupXLOG(void) pg_usleep(60000000L); #endif + /* + * Check for recovery control file, and if so set up state for + * offline recovery + */ + readRecoveryCommandFile(); + /* * Get the last valid checkpoint record. If the latest one according * to pg_control is broken, try the next-to-last one. @@ -2944,20 +3682,19 @@ StartupXLOG(void) if (record != NULL) { + bool recoveryContinue = true; + bool recoveryApply = true; + InRedo = true; ereport(LOG, (errmsg("redo starts at %X/%X", ReadRecPtr.xlogid, ReadRecPtr.xrecoff))); + + /* + * main redo apply loop + */ do { - /* nextXid must be beyond record's xid */ - if (TransactionIdFollowsOrEquals(record->xl_xid, - ShmemVariableCache->nextXid)) - { - ShmemVariableCache->nextXid = record->xl_xid; - TransactionIdAdvance(ShmemVariableCache->nextXid); - } - #ifdef WAL_DEBUG if (XLOG_DEBUG) { @@ -2974,16 +3711,40 @@ StartupXLOG(void) } #endif + /* + * Have we reached our recovery target? + */ + if (recoveryStopsHere(record, &recoveryApply)) + { + recoveryContinue = false; + if (!recoveryApply) + break; + } + + /* nextXid must be beyond record's xid */ + if (TransactionIdFollowsOrEquals(record->xl_xid, + ShmemVariableCache->nextXid)) + { + ShmemVariableCache->nextXid = record->xl_xid; + TransactionIdAdvance(ShmemVariableCache->nextXid); + } + if (record->xl_info & XLR_BKP_BLOCK_MASK) RestoreBkpBlocks(record, EndRecPtr); RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record); + + LastRec = ReadRecPtr; + record = ReadRecord(NULL, LOG, buffer); - } while (record != NULL); + } while (record != NULL && recoveryContinue); + /* + * end of main redo apply loop + */ + ereport(LOG, (errmsg("redo done at %X/%X", ReadRecPtr.xlogid, ReadRecPtr.xrecoff))); - LastRec = ReadRecPtr; InRedo = false; } else @@ -2992,12 +3753,29 @@ StartupXLOG(void) } /* - * Init xlog buffer cache using the block containing the last valid - * record from the previous incarnation. + * Re-fetch the last valid or last applied record, so we can identify + * the exact endpoint of what we consider the valid portion of WAL. */ record = ReadRecord(&LastRec, PANIC, buffer); EndOfLog = EndRecPtr; - XLByteToPrevSeg(EndOfLog, openLogId, openLogSeg); + XLByteToPrevSeg(EndOfLog, endLogId, endLogSeg); + + /* + * We are now done reading the old WAL. Turn off archive fetching + * if it was active, and make a writable copy of the last WAL segment. + * (Note that we also have a copy of the last block of the old WAL in + * readBuf; we will use that below.) + */ + if (InArchiveRecovery) + exitArchiveRecovery(endLogId, endLogSeg, EndOfLog.xrecoff); + + /* + * Prepare to write WAL starting at EndOfLog position, and init xlog + * buffer cache using the block containing the last record from the + * previous incarnation. + */ + openLogId = endLogId; + openLogSeg = endLogSeg; openLogFile = XLogFileOpen(openLogId, openLogSeg, false); openLogOff = 0; ControlFile->logId = openLogId; @@ -3707,6 +4485,7 @@ xlog_desc(char *buf, uint8 xl_info, char *rec) } #ifdef WAL_DEBUG + static void xlog_outrec(char *buf, XLogRecord *record) { @@ -3731,6 +4510,7 @@ xlog_outrec(char *buf, XLogRecord *record) sprintf(buf + strlen(buf), ": %s", RmgrTable[record->xl_rmid].rm_name); } + #endif /* WAL_DEBUG */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index d5cdb909c3..241f2550f6 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.120 2004/07/17 17:28:29 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.121 2004/07/19 02:47:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -5414,15 +5414,8 @@ copy_relation_data(Relation rel, SMgrRelation dst) /* * We need to log the copied data in WAL iff WAL archiving is enabled * AND it's not a temp rel. - * - * XXX when WAL archiving is actually supported, this test will likely - * need to change; and the hardwired extern is cruddy anyway ... */ - { - extern char XLOG_archive_dir[]; - - use_wal = XLOG_archive_dir[0] && !rel->rd_istemp; - } + use_wal = XLogArchivingActive() && !rel->rd_istemp; nblocks = RelationGetNumberOfBlocks(rel); for (blkno = 0; blkno < nblocks; blkno++) diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile index 489c6d9211..10d7cfcb8e 100644 --- a/src/backend/postmaster/Makefile +++ b/src/backend/postmaster/Makefile @@ -4,7 +4,7 @@ # Makefile for src/backend/postmaster # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.15 2004/05/29 22:48:19 tgl Exp $ +# $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.16 2004/07/19 02:47:08 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/postmaster top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = postmaster.o bgwriter.o pgstat.o +OBJS = postmaster.o bgwriter.o pgstat.o pgarch.o all: SUBSYS.o diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c new file mode 100644 index 0000000000..c9595094f1 --- /dev/null +++ b/src/backend/postmaster/pgarch.c @@ -0,0 +1,588 @@ +/*------------------------------------------------------------------------- + * + * pgarch.c + * + * PostgreSQL WAL archiver + * + * All functions relating to archiver are included here + * + * - All functions executed by archiver process + * + * - archiver is forked from postmaster, and the two + * processes then communicate using signals. All functions + * executed by postmaster are included in this file. + * + * Initial author: Simon Riggs simon@2ndquadrant.com + * + * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.1 2004/07/19 02:47:08 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include +#include +#include + +#include "postmaster/pgarch.h" +#include "libpq/pqsignal.h" +#include "miscadmin.h" +#include "postmaster/postmaster.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/pg_shmem.h" +#include "storage/pmsignal.h" +#include "utils/guc.h" +#include "utils/ps_status.h" + + +/* ---------- + * Timer definitions. + * ---------- + */ +#define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of + * the archive status directory; + * in seconds. */ +#define PGARCH_RESTART_INTERVAL 60 /* How often to attempt to restart + * a failed archiver; in seconds. */ + +/* ---------- + * Archiver control info. + * + * We expect that archivable files within pg_xlog will have names between + * MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters + * appearing in VALID_XFN_CHARS. The status files in archive_status have + * corresponding names with ".ready" or ".done" appended. + * ---------- + */ +#define MIN_XFN_CHARS 16 +#define MAX_XFN_CHARS 16 +#define VALID_XFN_CHARS "0123456789ABCDEF" + +#define NUM_ARCHIVE_RETRIES 3 + + +/* ---------- + * Local data + * ---------- + */ +static char XLogDir[MAXPGPATH]; +static char XLogArchiveStatusDir[MAXPGPATH]; +static time_t last_pgarch_start_time; + +/* + * Flags set by interrupt handlers for later service in the main loop. + */ +static volatile sig_atomic_t got_SIGHUP = false; +static volatile sig_atomic_t wakened = false; + +/* ---------- + * Local function forward declarations + * ---------- + */ +#ifdef EXEC_BACKEND +static pid_t pgarch_forkexec(void); +#endif + +NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]); +static void pgarch_exit(SIGNAL_ARGS); +static void ArchSigHupHandler(SIGNAL_ARGS); +static void pgarch_waken(SIGNAL_ARGS); +static void pgarch_MainLoop(void); +static void pgarch_ArchiverCopyLoop(void); +static bool pgarch_archiveXlog(char *xlog); +static bool pgarch_readyXlog(char *xlog); +static void pgarch_archiveDone(char *xlog); + + +/* ------------------------------------------------------------ + * Public functions called from postmaster follow + * ------------------------------------------------------------ + */ + +/* + * pgarch_start + * + * Called from postmaster at startup or after an existing archiver + * died. Attempt to fire up a fresh archiver process. + * + * Returns PID of child process, or 0 if fail. + * + * Note: if fail, we will be called again from the postmaster main loop. + */ +int +pgarch_start(void) +{ + time_t curtime; + pid_t pgArchPid; + + /* + * Do nothing if no archiver needed + */ + if (!XLogArchivingActive()) + return 0; + + /* + * Do nothing if too soon since last archiver start. This is a + * safety valve to protect against continuous respawn attempts if the + * archiver is dying immediately at launch. Note that since we will + * be re-called from the postmaster main loop, we will get another + * chance later. + */ + curtime = time(NULL); + if ((unsigned int) (curtime - last_pgarch_start_time) < + (unsigned int) PGARCH_RESTART_INTERVAL) + return 0; + last_pgarch_start_time = curtime; + + fflush(stdout); + fflush(stderr); + +#ifdef __BEOS__ + /* Specific beos actions before backend startup */ + beos_before_backend_startup(); +#endif + +#ifdef EXEC_BACKEND + switch ((pgArchPid = pgarch_forkexec())) +#else + switch ((pgArchPid = fork())) +#endif + { + case -1: +#ifdef __BEOS__ + /* Specific beos actions */ + beos_backend_startup_failed(); +#endif + ereport(LOG, + (errmsg("could not fork archiver: %m"))); + return 0; + +#ifndef EXEC_BACKEND + case 0: + /* in postmaster child ... */ +#ifdef __BEOS__ + /* Specific beos actions after backend startup */ + beos_backend_startup(); +#endif + /* Close the postmaster's sockets */ + ClosePostmasterPorts(); + + /* Drop our connection to postmaster's shared memory, as well */ + PGSharedMemoryDetach(); + + PgArchiverMain(0, NULL); + break; +#endif + + default: + return (int) pgArchPid; + } + + /* shouldn't get here */ + return 0; +} + +/* ------------------------------------------------------------ + * Local functions called by archiver follow + * ------------------------------------------------------------ + */ + + +#ifdef EXEC_BACKEND + +/* + * pgarch_forkexec() - + * + * Format up the arglist for, then fork and exec, archive process + */ +static pid_t +pgarch_forkexec(void) +{ + char *av[10]; + int ac = 0; + + av[ac++] = "postgres"; + + av[ac++] = "-forkarch"; + + av[ac++] = NULL; /* filled in by postmaster_forkexec */ + + av[ac] = NULL; + Assert(ac < lengthof(av)); + + return postmaster_forkexec(ac, av); +} + +#endif /* EXEC_BACKEND */ + + +/* + * PgArchiverMain + * + * The argc/argv parameters are valid only in EXEC_BACKEND case. However, + * since we don't use 'em, it hardly matters... + */ +NON_EXEC_STATIC void +PgArchiverMain(int argc, char *argv[]) +{ + IsUnderPostmaster = true; /* we are a postmaster subprocess now */ + + MyProcPid = getpid(); /* reset MyProcPid */ + + /* Lose the postmaster's on-exit routines */ + on_exit_reset(); + + /* + * Ignore all signals usually bound to some action in the postmaster, + * except for SIGHUP, SIGUSR1 and SIGQUIT. + */ + pqsignal(SIGHUP, ArchSigHupHandler); + pqsignal(SIGINT, SIG_IGN); + pqsignal(SIGTERM, SIG_IGN); + pqsignal(SIGQUIT, pgarch_exit); + pqsignal(SIGALRM, SIG_IGN); + pqsignal(SIGPIPE, SIG_IGN); + pqsignal(SIGUSR1, pgarch_waken); + pqsignal(SIGUSR2, SIG_IGN); + pqsignal(SIGCHLD, SIG_DFL); + pqsignal(SIGTTIN, SIG_DFL); + pqsignal(SIGTTOU, SIG_DFL); + pqsignal(SIGCONT, SIG_DFL); + pqsignal(SIGWINCH, SIG_DFL); + PG_SETMASK(&UnBlockSig); + + /* + * Identify myself via ps + */ + init_ps_display("archiver process", "", ""); + set_ps_display(""); + + /* Init XLOG file paths */ + snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir); + snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir); + + pgarch_MainLoop(); + + exit(0); +} + +/* SIGQUIT signal handler for archiver process */ +static void +pgarch_exit(SIGNAL_ARGS) +{ + /* + * For now, we just nail the doors shut and get out of town. It might + * seem cleaner to finish up any pending archive copies, but there's + * a nontrivial risk that init will kill us partway through. + */ + exit(0); +} + +/* SIGHUP: set flag to re-read config file at next convenient time */ +static void +ArchSigHupHandler(SIGNAL_ARGS) +{ + got_SIGHUP = true; +} + +/* SIGUSR1 signal handler for archiver process */ +static void +pgarch_waken(SIGNAL_ARGS) +{ + wakened = true; +} + +/* + * pgarch_MainLoop + * + * Main loop for archiver + */ +static void +pgarch_MainLoop(void) +{ + time_t last_copy_time = 0; + time_t curtime; + + /* + * We run the copy loop immediately upon entry, in case there are + * unarchived files left over from a previous database run (or maybe + * the archiver died unexpectedly). After that we wait for a signal + * or timeout before doing more. + */ + wakened = true; + + do { + + /* Check for config update */ + if (got_SIGHUP) + { + got_SIGHUP = false; + ProcessConfigFile(PGC_SIGHUP); + if (!XLogArchivingActive()) + break; /* user wants us to shut down */ + } + + /* Do what we're here for */ + if (wakened) + { + wakened = false; + pgarch_ArchiverCopyLoop(); + last_copy_time = time(NULL); + } + + /* + * There shouldn't be anything for the archiver to do except + * to wait for a signal, so we could use pause(3) here... + * ...however, the archiver exists to protect our data, so + * she wakes up occasionally to allow herself to be proactive. + * In particular this avoids getting stuck if a signal arrives + * just before we enter sleep(). + */ + if (!wakened) + { + sleep(PGARCH_AUTOWAKE_INTERVAL); + + curtime = time(NULL); + if ((unsigned int) (curtime - last_copy_time) >= + (unsigned int) PGARCH_AUTOWAKE_INTERVAL) + wakened = true; + } + } while (PostmasterIsAlive(true)); +} + +/* + * pgarch_ArchiverCopyLoop + * + * Archives all outstanding xlogs then returns + */ +static void +pgarch_ArchiverCopyLoop(void) +{ + char xlog[MAX_XFN_CHARS + 1]; + + /* + * loop through all xlogs with archive_status of .ready + * and archive them...mostly we expect this to be a single + * file, though it is possible some backend will add + * files onto the list of those that need archiving while we + * are still copying earlier archives + */ + while (pgarch_readyXlog(xlog)) + { + int failures = 0; + + for (;;) + { + if (pgarch_archiveXlog(xlog)) + { + /* successful */ + pgarch_archiveDone(xlog); + break; /* out of inner retry loop */ + } + else + { + if (++failures >= NUM_ARCHIVE_RETRIES) + { + ereport(WARNING, + (errmsg("transaction log file \"%s\" could not be archived", + xlog))); + return; /* give up archiving for now */ + } + sleep(1); /* wait a bit before retrying */ + } + } + } +} + +/* + * pgarch_archiveXlog + * + * Invokes system(3) to copy one archive file to wherever it should go + * + * Returns true if successful + */ +static bool +pgarch_archiveXlog(char *xlog) +{ + char xlogarchcmd[MAXPGPATH]; + char pathname[MAXPGPATH]; + char *dp; + char *endp; + const char *sp; + int rc; + + snprintf(pathname, MAXPGPATH, "%s/%s", XLogDir, xlog); + + /* + * construct the command to be executed + */ + dp = xlogarchcmd; + endp = xlogarchcmd + MAXPGPATH - 1; + *endp = '\0'; + + for (sp = XLogArchiveCommand; *sp; sp++) + { + if (*sp == '%') + { + switch (sp[1]) + { + case 'p': + /* %p: full path of source file */ + sp++; + StrNCpy(dp, pathname, endp-dp); + dp += strlen(dp); + break; + case 'f': + /* %f: filename of source file */ + sp++; + StrNCpy(dp, xlog, endp-dp); + dp += strlen(dp); + break; + case '%': + /* convert %% to a single % */ + sp++; + if (dp < endp) + *dp++ = *sp; + break; + default: + /* otherwise treat the % as not special */ + if (dp < endp) + *dp++ = *sp; + break; + } + } + else + { + if (dp < endp) + *dp++ = *sp; + } + } + *dp = '\0'; + + ereport(DEBUG3, + (errmsg_internal("executing archive command \"%s\"", + xlogarchcmd))); + rc = system(xlogarchcmd); + if (rc != 0) { + ereport(LOG, + (errmsg("archive command \"%s\" failed: return code %d", + xlogarchcmd, rc))); + return false; + } + ereport(LOG, + (errmsg("archived transaction log file \"%s\"", xlog))); + + return true; +} + +/* + * pgarch_readyXlog + * + * Return name of the oldest xlog file that has not yet been archived. + * No notification is set that file archiving is now in progress, so + * this would need to be extended if multiple concurrent archival + * tasks were created. If a failure occurs, we will completely + * re-copy the file at the next available opportunity. + * + * It is important that we return the oldest, so that we archive xlogs + * in order that they were written, for two reasons: + * 1) to maintain the sequential chain of xlogs required for recovery + * 2) because the oldest ones will sooner become candidates for + * recycling at time of checkpoint + */ +static bool +pgarch_readyXlog(char *xlog) +{ + /* + * open xlog status directory and read through list of + * xlogs that have the .ready suffix, looking for earliest file. + * It is possible to optimise this code, though only a single + * file is expected on the vast majority of calls, so.... + */ + char newxlog[MAX_XFN_CHARS + 6 + 1]; + DIR *rldir; + struct dirent *rlde; + bool found = false; + + rldir = AllocateDir(XLogArchiveStatusDir); + if (rldir == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open archive status directory \"%s\": %m", + XLogArchiveStatusDir))); + + errno = 0; + while ((rlde = readdir(rldir)) != NULL) + { + int basenamelen = (int) strlen(rlde->d_name) - 6; + + if (basenamelen >= MIN_XFN_CHARS && + basenamelen <= MAX_XFN_CHARS && + strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen && + strcmp(rlde->d_name + basenamelen, ".ready") == 0) + { + if (!found) { + strcpy(newxlog, rlde->d_name); + found = true; + } else { + if (strcmp(rlde->d_name, newxlog) < 0) + strcpy(newxlog, rlde->d_name); + } + } + + errno = 0; + } +#ifdef WIN32 + /* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but + not in released version */ + if (GetLastError() == ERROR_NO_MORE_FILES) + errno = 0; +#endif + if (errno) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read archive status directory \"%s\": %m", + XLogArchiveStatusDir))); + FreeDir(rldir); + + if (found) + { + /* truncate off the .ready */ + newxlog[strlen(newxlog) - 6] = '\0'; + strcpy(xlog, newxlog); + } + return found; +} + +/* + * pgarch_archiveDone + * + * Emit notification that an xlog file has been successfully archived. + * We do this by renaming the status file from NNN.ready to NNN.done. + * Eventually, a checkpoint process will notice this and delete both the + * NNN.done file and the xlog file itself. + */ +static void +pgarch_archiveDone(char *xlog) +{ + char rlogready[MAXPGPATH]; + char rlogdone[MAXPGPATH]; + int rc; + + snprintf(rlogready, MAXPGPATH, "%s/%s.ready", XLogArchiveStatusDir, xlog); + snprintf(rlogdone, MAXPGPATH, "%s/%s.done", XLogArchiveStatusDir, xlog); + rc = rename(rlogready, rlogdone); + if (rc < 0) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not rename \"%s\": %m", + rlogready))); +} diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 7edd43ce6b..d08d02c2cf 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.411 2004/07/12 19:14:56 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.412 2004/07/19 02:47:08 tgl Exp $ * * NOTES * @@ -104,6 +104,7 @@ #include "miscadmin.h" #include "nodes/nodes.h" #include "postmaster/postmaster.h" +#include "postmaster/pgarch.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/pg_shmem.h" @@ -198,6 +199,7 @@ char *preload_libraries_string = NULL; /* PIDs of special child processes; 0 when not running */ static pid_t StartupPID = 0, BgWriterPID = 0, + PgArchPID = 0, PgStatPID = 0; /* Startup/shutdown state */ @@ -826,7 +828,8 @@ PostmasterMain(int argc, char *argv[]) * * CAUTION: when changing this list, check for side-effects on the signal * handling setup of child processes. See tcop/postgres.c, - * bootstrap/bootstrap.c, postmaster/bgwriter.c, and postmaster/pgstat.c. + * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/pgarch.c, + * and postmaster/pgstat.c. */ pqinitmask(); PG_SETMASK(&BlockSig); @@ -1217,6 +1220,11 @@ ServerLoop(void) kill(BgWriterPID, SIGUSR2); } + /* If we have lost the archiver, try to start a new one */ + if (XLogArchivingActive() && PgArchPID == 0 && + StartupPID == 0 && !FatalError && Shutdown == NoShutdown) + PgArchPID = pgarch_start(); + /* If we have lost the stats collector, try to start a new one */ if (PgStatPID == 0 && StartupPID == 0 && !FatalError && Shutdown == NoShutdown) @@ -1760,6 +1768,8 @@ SIGHUP_handler(SIGNAL_ARGS) SignalChildren(SIGHUP); if (BgWriterPID != 0) kill(BgWriterPID, SIGHUP); + if (PgArchPID != 0) + kill(PgArchPID, SIGHUP); /* PgStatPID does not currently need SIGHUP */ load_hba(); load_ident(); @@ -1818,6 +1828,9 @@ pmdie(SIGNAL_ARGS) /* And tell it to shut down */ if (BgWriterPID != 0) kill(BgWriterPID, SIGUSR2); + /* Tell pgarch to shut down too; nothing left for it to do */ + if (PgArchPID != 0) + kill(PgArchPID, SIGQUIT); /* Tell pgstat to shut down too; nothing left for it to do */ if (PgStatPID != 0) kill(PgStatPID, SIGQUIT); @@ -1862,6 +1875,9 @@ pmdie(SIGNAL_ARGS) /* And tell it to shut down */ if (BgWriterPID != 0) kill(BgWriterPID, SIGUSR2); + /* Tell pgarch to shut down too; nothing left for it to do */ + if (PgArchPID != 0) + kill(PgArchPID, SIGQUIT); /* Tell pgstat to shut down too; nothing left for it to do */ if (PgStatPID != 0) kill(PgStatPID, SIGQUIT); @@ -1880,6 +1896,8 @@ pmdie(SIGNAL_ARGS) kill(StartupPID, SIGQUIT); if (BgWriterPID != 0) kill(BgWriterPID, SIGQUIT); + if (PgArchPID != 0) + kill(PgArchPID, SIGQUIT); if (PgStatPID != 0) kill(PgStatPID, SIGQUIT); if (DLGetHead(BackendList)) @@ -1967,12 +1985,16 @@ reaper(SIGNAL_ARGS) /* * Go to shutdown mode if a shutdown request was pending. - * Otherwise, try to start the stats collector too. + * Otherwise, try to start the archiver and stats collector too. */ if (Shutdown > NoShutdown && BgWriterPID != 0) kill(BgWriterPID, SIGUSR2); - else if (PgStatPID == 0 && Shutdown == NoShutdown) - PgStatPID = pgstat_start(); + else if (Shutdown == NoShutdown) { + if (XLogArchivingActive() && PgArchPID == 0) + PgArchPID = pgarch_start(); + if (PgStatPID == 0) + PgStatPID = pgstat_start(); + } continue; } @@ -2004,6 +2026,23 @@ reaper(SIGNAL_ARGS) continue; } + /* + * Was it the archiver? If so, just try to start a new + * one; no need to force reset of the rest of the system. (If fail, + * we'll try again in future cycles of the main loop.) + */ + if (PgArchPID != 0 && pid == PgArchPID) + { + PgArchPID = 0; + if (exitstatus != 0) + LogChildExit(LOG, gettext("archiver process"), + pid, exitstatus); + if (XLogArchivingActive() && + StartupPID == 0 && !FatalError && Shutdown == NoShutdown) + PgArchPID = pgarch_start(); + continue; + } + /* * Was it the statistics collector? If so, just try to start a new * one; no need to force reset of the rest of the system. (If fail, @@ -2029,8 +2068,9 @@ reaper(SIGNAL_ARGS) if (FatalError) { /* - * Wait for all children exit, then reset shmem and - * StartupDataBase. + * Wait for all important children to exit, then reset shmem and + * StartupDataBase. (We can ignore the archiver and stats processes + * here since they are not connected to shmem.) */ if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0) goto reaper_done; @@ -2191,6 +2231,17 @@ HandleChildCrash(int pid, kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT)); } + /* Force a power-cycle of the pgarch process too */ + /* (Shouldn't be necessary, but just for luck) */ + if (PgArchPID != 0 && !FatalError) + { + ereport(DEBUG2, + (errmsg_internal("sending %s to process %d", + "SIGQUIT", + (int) PgArchPID))); + kill(PgArchPID, SIGQUIT); + } + /* Force a power-cycle of the pgstat processes too */ /* (Shouldn't be necessary, but just for luck) */ if (PgStatPID != 0 && !FatalError) @@ -2873,6 +2924,16 @@ SubPostmasterMain(int argc, char *argv[]) BootstrapMain(argc - 2, argv + 2); proc_exit(0); } + if (strcmp(argv[1], "-forkarch") == 0) + { + /* Close the postmaster's sockets */ + ClosePostmasterPorts(); + + /* Do not want to attach to shared memory */ + + PgArchiverMain(argc, argv); + proc_exit(0); + } if (strcmp(argv[1], "-forkbuf") == 0) { /* Close the postmaster's sockets */ @@ -2951,6 +3012,18 @@ sigusr1_handler(SIGNAL_ARGS) if (Shutdown <= SmartShutdown) SignalChildren(SIGUSR1); } + + if (PgArchPID != 0 && Shutdown == NoShutdown) + { + if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER)) + { + /* + * Send SIGUSR1 to archiver process, to wake it up and begin + * archiving next transaction log file. + */ + kill(PgArchPID, SIGUSR1); + } + } PG_SETMASK(&UnBlockSig); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index eb1866fff2..1ef19bb47c 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.219 2004/07/12 02:22:51 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.220 2004/07/19 02:47:10 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -1371,6 +1371,15 @@ static struct config_real ConfigureNamesReal[] = static struct config_string ConfigureNamesString[] = { + { + {"archive_command", PGC_SIGHUP, WAL_SETTINGS, + gettext_noop("WAL archiving command."), + gettext_noop("The shell command that will be called to archive a WAL file.") + }, + &XLogArchiveCommand, + "", NULL, NULL + }, + { {"client_encoding", PGC_USERSET, CLIENT_CONN_LOCALE, gettext_noop("Sets the client's character set encoding."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index dcfbdf9242..9dc1ec8d83 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -111,6 +111,16 @@ #commit_delay = 0 # range 0-100000, in microseconds #commit_siblings = 5 # range 1-1000 +# - Archiving - + +#archive_command = '' # command to use to archive a logfile segment + +# If archive_command is '' then archiving is disabled. Otherwise, set it +# to a command to copy a file to the proper place. A simplistic example +# is 'cp %p /mnt/server/archivedir/%f'. Any %p in the string is replaced +# by the absolute path of the file to archive, while any %f is replaced by +# the file name only. NOTE: it is important for the command to return +# zero exit status if and only if it succeeded. #--------------------------------------------------------------------------- # QUERY TUNING diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index f947955557..ad71289ba2 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -39,7 +39,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * Portions taken from FreeBSD. * - * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.43 2004/07/14 17:55:10 petere Exp $ + * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.44 2004/07/19 02:47:12 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -145,7 +145,7 @@ static char *get_id(void); static char *get_encoding_id(char *encoding_name); static char *get_short_version(void); static int check_data_dir(void); -static bool mkdatadir(char *subdir); +static bool mkdatadir(const char *subdir); static void set_input(char **dest, char *filename); static void check_input(char *path); static void set_short_version(char *short_version, char *extrapath); @@ -900,7 +900,7 @@ check_data_dir(void) * make the data directory (or one of its subdirectories if subdir is not NULL) */ static bool -mkdatadir(char *subdir) +mkdatadir(const char *subdir) { char *path; @@ -2022,8 +2022,16 @@ main(int argc, char *argv[]) char *short_version; char *pgdenv; /* PGDATA value got from sent to * environment */ - char *subdirs[] = - {"global", "pg_xlog", "pg_clog", "pg_subtrans", "base", "base/1", "pg_tblspc"}; + static const char *subdirs[] = { + "global", + "pg_xlog", + "pg_xlog/archive_status", + "pg_clog", + "pg_subtrans", + "base", + "base/1", + "pg_tblspc" + }; progname = get_progname(argv[0]); set_pglocale_pgservice(argv[0], "initdb"); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 1c29ab0762..630a62d77a 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.52 2004/07/01 00:51:38 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.53 2004/07/19 02:47:13 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -208,9 +208,12 @@ extern XLogRecPtr ProcLastRecEnd; /* these variables are GUC parameters related to XLOG */ extern int CheckPointSegments; extern int XLOGbuffers; +extern char *XLogArchiveCommand; extern char *XLOG_sync_method; extern const char XLOG_sync_method_default[]; +#define XLogArchivingActive() (XLogArchiveCommand[0] != '\0') + #ifdef WAL_DEBUG extern bool XLOG_DEBUG; #endif diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h new file mode 100644 index 0000000000..0ce9e15781 --- /dev/null +++ b/src/include/postmaster/pgarch.h @@ -0,0 +1,26 @@ +/*------------------------------------------------------------------------- + * + * pgarch.h + * Exports from postmaster/pgarch.c. + * + * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/postmaster/pgarch.h,v 1.1 2004/07/19 02:47:16 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef _PGARCH_H +#define _PGARCH_H + +/* ---------- + * Functions called from postmaster + * ---------- + */ +extern int pgarch_start(void); + +#ifdef EXEC_BACKEND +extern void PgArchiverMain(int argc, char *argv[]); +#endif + +#endif /* _PGARCH_H */ diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index 4180c95c73..808e5013c0 100644 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.8 2004/05/29 22:48:23 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.9 2004/07/19 02:47:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,6 +24,7 @@ typedef enum { PMSIGNAL_PASSWORD_CHANGE, /* pg_pwd file has changed */ PMSIGNAL_WAKEN_CHILDREN, /* send a SIGUSR1 signal to all backends */ + PMSIGNAL_WAKEN_ARCHIVER, /* send a NOTIFY signal to xlog archiver */ NUM_PMSIGNALS /* Must be last value of enum! */ } PMSignalReason;