* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.147 2004/07/01 00:49:50 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.148 2004/07/19 02:47:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include <ctype.h>
#include <fcntl.h>
#include <signal.h>
#include <time.h>
#include "storage/bufpage.h"
#include "storage/fd.h"
#include "storage/lwlock.h"
+#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "storage/spin.h"
/* User-settable parameters */
int CheckPointSegments = 3;
int XLOGbuffers = 8;
+char *XLogArchiveCommand = NULL;
char *XLOG_sync_method = NULL;
const char XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;
-char XLOG_archive_dir[MAXPGPATH]; /* null string means
- * delete 'em */
#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
*/
StartUpID ThisStartUpID = 0;
-/* Are we doing recovery by reading XLOG? */
+/* Are we doing recovery from XLOG? */
bool InRecovery = false;
+/* Are we recovering using offline XLOG archives? */
+static bool InArchiveRecovery = false;
+/* Was the last file restored from archive, or local? */
+static bool restoredFromArchive = false;
+
+static char recoveryRestoreCommand[MAXPGPATH];
+static bool recoveryTarget = false;
+static bool recoveryTargetExact = false;
+static bool recoveryTargetInclusive = true;
+static TransactionId recoveryTargetXid;
+static time_t recoveryTargetTime;
/*
* MyLastRecPtr points to the start of the last XLOG record inserted by the
((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
-#define XLogFileName(path, log, seg) \
- snprintf(path, MAXPGPATH, "%s/%08X%08X", \
- XLogDir, log, seg)
-
#define PrevBufIdx(idx) \
(((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1))
((xrecoff) % BLCKSZ >= SizeOfXLogPHD && \
(BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord)
+/*
+ * These macros encapsulate knowledge about the exact layout of XLog file
+ * names as well as archive-status file names.
+ */
+#define MAXFNAMELEN 32
+
+#define XLogFileName(fname, log, seg) \
+ snprintf(fname, MAXFNAMELEN, "%08X%08X", log, seg)
+
+#define XLogFilePath(path, log, seg) \
+ snprintf(path, MAXPGPATH, "%s/%08X%08X", XLogDir, log, seg)
+
+#define StatusFilePath(path, xlog, suffix) \
+ snprintf(path, MAXPGPATH, "%s/archive_status/%s%s", XLogDir, xlog, suffix)
+
/*
* _INTL_MAXLOGRECSZ: max space needed for a record including header and
* any backup-block data.
static bool InRedo = false;
+static void XLogArchiveNotify(const char *xlog);
+static void XLogArchiveNotifySeg(uint32 log, uint32 seg);
+static bool XLogArchiveIsDone(const char *xlog);
+static void XLogArchiveCleanup(const char *xlog);
+static void readRecoveryCommandFile(void);
+static void exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg,
+ uint32 xrecoff);
+static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
static bool AdvanceXLInsertBuffer(void);
static bool WasteXLInsertBuffer(void);
bool find_free, int max_advance,
bool use_lock);
static int XLogFileOpen(uint32 log, uint32 seg, bool econt);
+static void RestoreArchivedXLog(char *path, uint32 log, uint32 seg);
static void PreallocXlogFiles(XLogRecPtr endptr);
static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr);
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer);
return (RecPtr);
}
+/*
+ * XLogArchiveNotify
+ *
+ * Create an archive notification file
+ *
+ * The name of the notification file is the message that will be picked up
+ * by the archiver, e.g. we write 00000001000000C6.ready
+ * and the archiver then knows to archive XLogDir/00000001000000C6,
+ * then when complete, rename it to 00000001000000C6.done
+ */
+static void
+XLogArchiveNotify(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+ FILE *fd;
+
+ /* insert an otherwise empty file called <XLOG>.ready */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ fd = AllocateFile(archiveStatusPath, "w");
+ if (fd == NULL) {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not create archive status file \"%s\": %m",
+ archiveStatusPath)));
+ return;
+ }
+ if (FreeFile(fd)) {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write archive status file \"%s\": %m",
+ archiveStatusPath)));
+ return;
+ }
+
+ /* Notify archiver that it's got something to do */
+ if (IsUnderPostmaster)
+ SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
+}
+
+/*
+ * Convenience routine to notify using log/seg representation of filename
+ */
+static void
+XLogArchiveNotifySeg(uint32 log, uint32 seg)
+{
+ char xlog[MAXFNAMELEN];
+
+ XLogFileName(xlog, log, seg);
+ XLogArchiveNotify(xlog);
+}
+
+/*
+ * XLogArchiveIsDone
+ *
+ * Checks for a ".done" archive notification file. This is called when we
+ * are ready to delete or recycle an old XLOG segment file. If it is okay
+ * to delete it then return true.
+ *
+ * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
+ * then return false; else create <XLOG>.ready and return false. The
+ * last case covers the possibility that the original attempt to create
+ * <XLOG>.ready failed.
+ */
+static bool
+XLogArchiveIsDone(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+ struct stat stat_buf;
+
+ /* First check for .done --- this is the expected case */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return true;
+
+ /* check for .ready --- this means archiver is still busy with it */
+ StatusFilePath(archiveStatusPath, xlog, ".ready");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return false;
+
+ /* Race condition --- maybe archiver just finished, so recheck */
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ if (stat(archiveStatusPath, &stat_buf) == 0)
+ return true;
+
+ /* Retry creation of the .ready file */
+ XLogArchiveNotify(xlog);
+ return false;
+}
+
+/*
+ * XLogArchiveCleanup
+ *
+ * Cleanup an archive notification file for a particular xlog segment
+ */
+static void
+XLogArchiveCleanup(const char *xlog)
+{
+ char archiveStatusPath[MAXPGPATH];
+
+ StatusFilePath(archiveStatusPath, xlog, ".done");
+ unlink(archiveStatusPath);
+ /* should we complain about failure? */
+}
+
/*
* Advance the Insert state to the next buffer page, writing out the next
* buffer if it still contains unwritten data.
* and re-open prior segments when an fsync request comes along
* later. Doing it here ensures that one and only one backend will
* perform this fsync.
+ *
+ * This is also the right place to notify the Archiver that the
+ * segment is ready to copy to archival storage.
*/
if (openLogOff >= XLogSegSize && !ispartialpage)
{
issue_xlog_fsync();
LogwrtResult.Flush = LogwrtResult.Write; /* end of current page */
+
+ if (XLogArchivingActive())
+ XLogArchiveNotifySeg(openLogId, openLogSeg);
}
if (ispartialpage)
int fd;
int nbytes;
- XLogFileName(path, log, seg);
+ XLogFilePath(path, log, seg);
/*
* Try to use existent file (checkpoint maker may have created it
char path[MAXPGPATH];
struct stat stat_buf;
- XLogFileName(path, log, seg);
+ XLogFilePath(path, log, seg);
/*
* We want to be sure that only one process does this at a time.
return false;
}
NextLogSeg(log, seg);
- XLogFileName(path, log, seg);
+ XLogFilePath(path, log, seg);
}
}
char path[MAXPGPATH];
int fd;
- XLogFileName(path, log, seg);
+ if (InArchiveRecovery)
+ RestoreArchivedXLog(path, log, seg);
+ else
+ XLogFilePath(path, log, seg);
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,
S_IRUSR | S_IWUSR);
path, log, seg)));
}
+ /*
+ * XXX this is a pretty horrid hack. Remove after implementing timelines.
+ *
+ * if we switched back to local xlogs after having been
+ * restoring from archive, we need to make sure that the
+ * local files don't get removed by end-of-recovery checkpoint
+ * in case we need to re-run the recovery
+ *
+ * we want to copy these away as soon as possible, so set
+ * the archive status flag to .ready for them
+ * in case admin isn't cautious enough to have done this anyway
+ *
+ * XXX this is completely broken, because there is no guarantee this file
+ * is actually complete and ready to be archived. Also, what if there's
+ * a .done file for them?
+ */
+ if (InArchiveRecovery && !restoredFromArchive)
+ XLogArchiveNotifySeg(log, seg);
+
return (fd);
}
+/*
+ * Get next logfile segment when using off-line archive for recovery
+ *
+ * Attempt to retrieve the specified segment from off-line archival storage.
+ * If successful, fill "path" with its complete path (note that this will be
+ * a temp file name that doesn't follow the normal naming convention).
+ *
+ * If not successful, fill "path" with the name of the normal on-line segment
+ * file (which may or may not actually exist, but we'll try to use it).
+ */
+static void
+RestoreArchivedXLog(char *path, uint32 log, uint32 seg)
+{
+ char xlogfname[MAXFNAMELEN];
+ char xlogpath[MAXPGPATH];
+ char xlogRestoreCmd[MAXPGPATH];
+ char *dp;
+ char *endp;
+ const char *sp;
+ int rc;
+ struct stat stat_buf;
+
+ /*
+ * When doing archive recovery, we always prefer an archived log file
+ * even if a file of the same name exists in XLogDir. The reason is
+ * that the file in XLogDir could be an old, un-filled or partly-filled
+ * version that was copied and restored as part of backing up $PGDATA.
+ *
+ * We could try to optimize this slightly by checking the local
+ * copy lastchange timestamp against the archived copy,
+ * but we have no API to do this, nor can we guarantee that the
+ * lastchange timestamp was preserved correctly when we copied
+ * to archive. Our aim is robustness, so we elect not to do this.
+ *
+ * If we cannot obtain the log file from the archive, however, we
+ * will try to use the XLogDir file if it exists. This is so that
+ * we can make use of log segments that weren't yet transferred to
+ * the archive.
+ *
+ * Notice that we don't actually overwrite any files when we copy back
+ * from archive because the recoveryRestoreCommand may inadvertently
+ * restore inappropriate xlogs, or they may be corrupt, so we may
+ * wish to fallback to the segments remaining in current XLogDir later.
+ * The copy-from-archive filename is always the same, ensuring that we
+ * don't run out of disk space on long recoveries.
+ */
+ XLogFileName(xlogfname, log, seg);
+ snprintf(xlogpath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir);
+
+ /*
+ * Make sure there is no existing RECOVERYXLOG file.
+ */
+ if (stat(xlogpath, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not stat \"%s\": %m",
+ xlogpath)));
+ }
+ else
+ {
+ if (unlink(xlogpath) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not remove \"%s\": %m",
+ xlogpath)));
+ }
+
+ /*
+ * construct the command to be executed
+ */
+ dp = xlogRestoreCmd;
+ endp = xlogRestoreCmd + MAXPGPATH - 1;
+ *endp = '\0';
+
+ for (sp = recoveryRestoreCommand; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ switch (sp[1])
+ {
+ case 'p':
+ /* %p: full path of target file */
+ sp++;
+ StrNCpy(dp, xlogpath, endp-dp);
+ dp += strlen(dp);
+ break;
+ case 'f':
+ /* %f: filename of desired file */
+ sp++;
+ StrNCpy(dp, xlogfname, endp-dp);
+ dp += strlen(dp);
+ break;
+ case '%':
+ /* convert %% to a single % */
+ sp++;
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ default:
+ /* otherwise treat the % as not special */
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ }
+ }
+ else
+ {
+ if (dp < endp)
+ *dp++ = *sp;
+ }
+ }
+ *dp = '\0';
+
+ ereport(DEBUG3,
+ (errmsg_internal("executing restore command \"%s\"",
+ xlogRestoreCmd)));
+
+ /*
+ * Copy xlog from archival storage to XLogDir
+ */
+ rc = system(xlogRestoreCmd);
+ if (rc == 0)
+ {
+ /* restore success ... assuming file is really there now ... */
+ if (stat(xlogpath, &stat_buf) == 0) {
+ ereport(LOG,
+ (errmsg("restored log file \"%s\" from archive",
+ xlogfname)));
+ strcpy(path, xlogpath);
+ restoredFromArchive = true;
+ return;
+ }
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not stat \"%s\": %m",
+ xlogpath)));
+ }
+
+ /*
+ * remember, we rollforward UNTIL the restore fails
+ * so failure here is just part of the process...
+ * that makes it difficult to determine whether the restore
+ * failed because there isn't an archive to restore, or
+ * because the administrator has specified the restore
+ * program incorrectly. We have to assume the former.
+ */
+ ereport(DEBUG1,
+ (errmsg("could not restore \"%s\" from archive: return code %d",
+ xlogfname, rc)));
+
+ /*
+ * if an archived file is not available, there might still be a version
+ * of this file in XLogDir, so return that as the filename to open.
+ *
+ * In many recovery scenarios we expect this to fail also, but
+ * if so that just means we've reached the end of WAL.
+ */
+ XLogFilePath(path, log, seg);
+ restoredFromArchive = false;
+}
+
/*
* Preallocate log files beyond the specified log endpoint, according to
* the XLOGfile user parameter.
uint32 endlogSeg;
DIR *xldir;
struct dirent *xlde;
- char lastoff[32];
+ char lastoff[MAXFNAMELEN];
char path[MAXPGPATH];
XLByteToPrevSeg(endptr, endlogId, endlogSeg);
errmsg("could not open transaction log directory \"%s\": %m",
XLogDir)));
- sprintf(lastoff, "%08X%08X", log, seg);
+ XLogFileName(lastoff, log, seg);
errno = 0;
while ((xlde = readdir(xldir)) != NULL)
{
+ /*
+ * use the alphanumeric sorting property of the filenames to decide
+ * which ones are earlier than the lastoff segment
+ */
if (strlen(xlde->d_name) == 16 &&
strspn(xlde->d_name, "0123456789ABCDEF") == 16 &&
strcmp(xlde->d_name, lastoff) <= 0)
{
- snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name);
- if (XLOG_archive_dir[0])
- {
- ereport(LOG,
- (errmsg("archiving transaction log file \"%s\"",
- xlde->d_name)));
- elog(WARNING, "archiving log files is not implemented");
- }
- else
+ bool recycle;
+
+ if (XLogArchivingActive())
+ recycle = XLogArchiveIsDone(xlde->d_name);
+ else
+ recycle = true;
+
+ if (recycle)
{
+ snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name);
+
/*
* Before deleting the file, see if it can be recycled as
* a future log segment. We allow recycling segments up
{
/* No need for any more future segments... */
ereport(LOG,
- (errmsg("removing transaction log file \"%s\"",
- xlde->d_name)));
+ (errmsg("removing transaction log file \"%s\"",
+ xlde->d_name)));
unlink(path);
}
+
+ XLogArchiveCleanup(xlde->d_name);
}
}
errno = 0;
return buf;
}
+/*
+ * See if there is a recovery command file (recovery.conf), and if so
+ * read in parameters for archive recovery.
+ *
+ * XXX longer term intention is to expand this to
+ * cater for additional parameters and controls
+ * possibly use a flex lexer similar to the GUC one
+ */
+static void
+readRecoveryCommandFile(void)
+{
+ char recoveryCommandFile[MAXPGPATH];
+ FILE *fd;
+ char cmdline[MAXPGPATH];
+ bool syntaxError = false;
+
+ snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir);
+ fd = AllocateFile(recoveryCommandFile, "r");
+ if (fd == NULL)
+ {
+ if (errno == ENOENT)
+ return; /* not there, so no archive recovery */
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not open recovery command file \"%s\": %m",
+ recoveryCommandFile)));
+ }
+
+ ereport(LOG,
+ (errmsg("starting archive recovery")));
+
+ /*
+ * Parse the file...
+ */
+ while (fgets(cmdline, MAXPGPATH, fd) != NULL)
+ {
+ /* skip leading whitespace and check for # comment */
+ char *ptr;
+ char *tok1;
+ char *tok2;
+
+ for (ptr = cmdline; *ptr; ptr++)
+ {
+ if (!isspace((unsigned char) *ptr))
+ break;
+ }
+ if (*ptr == '\0' || *ptr == '#')
+ continue;
+
+ /* identify the quoted parameter value */
+ tok1 = strtok(ptr, "'");
+ if (!tok1)
+ {
+ syntaxError = true;
+ break;
+ }
+ tok2 = strtok(NULL, "'");
+ if (!tok2)
+ {
+ syntaxError = true;
+ break;
+ }
+ /* reparse to get just the parameter name */
+ tok1 = strtok(ptr, " \t=");
+ if (!tok1)
+ {
+ syntaxError = true;
+ break;
+ }
+
+ if (strcmp(tok1,"restore_command") == 0) {
+ StrNCpy(recoveryRestoreCommand, tok2, MAXPGPATH);
+ ereport(LOG,
+ (errmsg("restore_command = \"%s\"",
+ recoveryRestoreCommand)));
+ }
+ else if (strcmp(tok1,"recovery_target_xid") == 0) {
+ errno = 0;
+ recoveryTargetXid = (TransactionId) strtoul(tok2, NULL, 0);
+ if (errno == EINVAL || errno == ERANGE)
+ ereport(FATAL,
+ (errmsg("recovery_target_xid is not a valid number: \"%s\"",
+ tok2)));
+ ereport(LOG,
+ (errmsg("recovery_target_xid = %u",
+ recoveryTargetXid)));
+ recoveryTarget = true;
+ recoveryTargetExact = true;
+ }
+ else if (strcmp(tok1,"recovery_target_time") == 0) {
+ struct tm tm;
+
+ /*
+ * if recovery_target_xid specified, then this overrides
+ * recovery_target_time
+ */
+ if (recoveryTargetExact)
+ continue;
+ recoveryTarget = true;
+ recoveryTargetExact = false;
+ /*
+ * convert the time string given
+ * by the user to the time_t format.
+ */
+ if (strptime(tok2, "%Y-%m-%d %H:%M:%S", &tm) == NULL)
+ ereport(FATAL,
+ (errmsg("invalid recovery_target_time \"%s\"",
+ tok2),
+ errhint("Correct format is YYYY-MM-DD hh:mm:ss.")));
+ recoveryTargetTime = mktime(&tm);
+ if (recoveryTargetTime == (time_t) -1)
+ ereport(FATAL,
+ (errmsg("invalid recovery_target_time \"%s\"",
+ tok2),
+ errhint("Correct format is YYYY-MM-DD hh:mm:ss.")));
+ ereport(LOG,
+ (errmsg("recovery_target_time = %s",
+ tok2)));
+ }
+ else if (strcmp(tok1,"recovery_target_inclusive") == 0) {
+ /*
+ * does nothing if a recovery_target is not also set
+ */
+ if (strcmp(tok2, "true") == 0)
+ recoveryTargetInclusive = true;
+ else
+ {
+ recoveryTargetInclusive = false;
+ tok2 = "false";
+ }
+ ereport(LOG,
+ (errmsg("recovery_target_inclusive = %s", tok2)));
+ }
+ else
+ ereport(FATAL,
+ (errmsg("unrecognized recovery parameter \"%s\"",
+ tok1)));
+ }
+
+ FreeFile(fd);
+
+ if (syntaxError)
+ ereport(FATAL,
+ (errmsg("syntax error in recovery command file: %s",
+ cmdline),
+ errhint("Lines should have the format parameter = 'value'.")));
+
+ /* Check that required parameters were supplied */
+ if (recoveryRestoreCommand[0] == '\0')
+ ereport(FATAL,
+ (errmsg("recovery command file \"%s\" did not specify restore_command",
+ recoveryCommandFile)));
+
+ /*
+ * clearly indicate our state
+ */
+ InArchiveRecovery = true;
+}
+
+/*
+ * Exit archive-recovery state
+ */
+static void
+exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
+{
+ char recoveryPath[MAXPGPATH];
+ char xlogpath[MAXPGPATH];
+ char recoveryCommandFile[MAXPGPATH];
+ char recoveryCommandDone[MAXPGPATH];
+
+ /*
+ * Disable fetches from archive, so we can use XLogFileOpen below.
+ */
+ InArchiveRecovery = false;
+
+ /*
+ * We should have the ending log segment currently open. Verify,
+ * and then close it (to avoid problems on Windows with trying to
+ * rename or delete an open file).
+ */
+ Assert(readFile >= 0);
+ Assert(readId == endLogId);
+ Assert(readSeg == endLogSeg);
+
+ close(readFile);
+ readFile = -1;
+
+ /*
+ * If the segment was fetched from archival storage, we want to replace
+ * the existing xlog segment (if any) with the archival version. This
+ * is because whatever is in XLogDir is very possibly older than what
+ * we have from the archives, since it could have come from restoring
+ * a PGDATA backup. In any case, the archival version certainly is
+ * more descriptive of what our current database state is, because that
+ * is what we replayed from.
+ *
+ * XXX there ought to be a timeline increment somewhere around here.
+ */
+ snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir);
+ XLogFilePath(xlogpath, endLogId, endLogSeg);
+
+ if (restoredFromArchive)
+ {
+ ereport(DEBUG3,
+ (errmsg_internal("moving last restored xlog to \"%s\"",
+ xlogpath)));
+ unlink(xlogpath); /* might or might not exist */
+ if (rename(recoveryPath, xlogpath) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename \"%s\" to \"%s\": %m",
+ recoveryPath, xlogpath)));
+ /* XXX might we need to fix permissions on the file? */
+ }
+ else
+ {
+ /*
+ * If the latest segment is not archival, but there's still a
+ * RECOVERYXLOG laying about, get rid of it.
+ */
+ unlink(recoveryPath); /* ignore any error */
+ }
+
+ /*
+ * If we restored to a point-in-time, then the current WAL segment
+ * probably contains records beyond the stop point. These represent an
+ * extreme hazard: if we crash in the near future, the replay apparatus
+ * will know no reason why it shouldn't replay them. Therefore,
+ * explicitly zero out all the remaining pages of the segment. (We need
+ * not worry about the partial page in which the last record ends, since
+ * StartUpXlog will handle zeroing that. Also, there's nothing to do
+ * if we are right at a segment boundary.)
+ *
+ * XXX segment files beyond thhe current one also represent a hazard
+ * for the same reason. Need to invent timelines to fix this.
+ */
+
+ /* align xrecoff to next page, then drop segment part */
+ if (xrecoff % BLCKSZ != 0)
+ xrecoff += (BLCKSZ - xrecoff % BLCKSZ);
+ xrecoff %= XLogSegSize;
+
+ if (recoveryTarget && xrecoff != 0)
+ {
+ int fd;
+ char zbuffer[BLCKSZ];
+
+ fd = XLogFileOpen(endLogId, endLogSeg, false);
+ MemSet(zbuffer, 0, sizeof(zbuffer));
+ if (lseek(fd, (off_t) xrecoff, SEEK_SET) < 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not seek in file \"%s\": %m",
+ xlogpath)));
+ for (; xrecoff < XLogSegSize; xrecoff += sizeof(zbuffer))
+ {
+ errno = 0;
+ if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", xlogpath)));
+ }
+ }
+ if (pg_fsync(fd) != 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m", xlogpath)));
+ if (close(fd))
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m", xlogpath)));
+ }
+
+ /*
+ * Rename the config file out of the way, so that we don't accidentally
+ * re-enter archive recovery mode in a subsequent crash.
+ */
+ snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir);
+ snprintf(recoveryCommandDone, MAXPGPATH, "%s/recovery.done", DataDir);
+ unlink(recoveryCommandDone);
+ if (rename(recoveryCommandFile, recoveryCommandDone) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename \"%s\" to \"%s\": %m",
+ recoveryCommandFile, recoveryCommandDone)));
+
+ ereport(LOG,
+ (errmsg("archive recovery complete")));
+}
+
+/*
+ * For point-in-time recovery, this function decides whether we want to
+ * stop applying the XLOG at or after the current record.
+ *
+ * Returns TRUE if we are stopping, FALSE otherwise. On TRUE return,
+ * *includeThis is set TRUE if we should apply this record before stopping.
+ */
+static bool
+recoveryStopsHere(XLogRecord *record, bool *includeThis)
+{
+ bool stopsHere;
+ uint8 record_info;
+ time_t recordXtime;
+
+ /* Do we have a PITR target at all? */
+ if (!recoveryTarget)
+ return false;
+
+ /* We only consider stopping at COMMIT or ABORT records */
+ if (record->xl_rmid != RM_XACT_ID)
+ return false;
+ record_info = record->xl_info & ~XLR_INFO_MASK;
+ if (record_info == XLOG_XACT_COMMIT)
+ {
+ xl_xact_commit *recordXactCommitData;
+
+ recordXactCommitData = (xl_xact_commit *) XLogRecGetData(record);
+ recordXtime = recordXactCommitData->xtime;
+ }
+ else if (record_info == XLOG_XACT_ABORT)
+ {
+ xl_xact_abort *recordXactAbortData;
+
+ recordXactAbortData = (xl_xact_abort *) XLogRecGetData(record);
+ recordXtime = recordXactAbortData->xtime;
+ }
+ else
+ return false;
+
+ if (recoveryTargetExact)
+ {
+ /*
+ * there can be only one transaction end record
+ * with this exact transactionid
+ *
+ * when testing for an xid, we MUST test for
+ * equality only, since transactions are numbered
+ * in the order they start, not the order they
+ * complete. A higher numbered xid will complete
+ * before you about 50% of the time...
+ */
+ stopsHere = (record->xl_xid == recoveryTargetXid);
+ if (stopsHere)
+ *includeThis = recoveryTargetInclusive;
+ }
+ else
+ {
+ /*
+ * there can be many transactions that
+ * share the same commit time, so
+ * we stop after the last one, if we are
+ * inclusive, or stop at the first one
+ * if we are exclusive
+ */
+ if (recoveryTargetInclusive)
+ stopsHere = (recordXtime > recoveryTargetTime);
+ else
+ stopsHere = (recordXtime >= recoveryTargetTime);
+ if (stopsHere)
+ *includeThis = false;
+ }
+
+ if (stopsHere)
+ {
+ if (record_info == XLOG_XACT_COMMIT)
+ {
+ if (*includeThis)
+ ereport(LOG,
+ (errmsg("recovery stopping after commit of transaction %u, time %s",
+ record->xl_xid, str_time(recordXtime))));
+ else
+ ereport(LOG,
+ (errmsg("recovery stopping before commit of transaction %u, time %s",
+ record->xl_xid, str_time(recordXtime))));
+ }
+ else
+ {
+ if (*includeThis)
+ ereport(LOG,
+ (errmsg("recovery stopping after abort of transaction %u, time %s",
+ record->xl_xid, str_time(recordXtime))));
+ else
+ ereport(LOG,
+ (errmsg("recovery stopping before abort of transaction %u, time %s",
+ record->xl_xid, str_time(recordXtime))));
+ }
+ }
+
+ return stopsHere;
+}
+
/*
* This must be called ONCE during postmaster or standalone-backend startup
*/
LastRec,
checkPointLoc,
EndOfLog;
+ uint32 endLogId;
+ uint32 endLogSeg;
XLogRecord *record;
char *buffer;
uint32 freespace;
pg_usleep(60000000L);
#endif
+ /*
+ * Check for recovery control file, and if so set up state for
+ * offline recovery
+ */
+ readRecoveryCommandFile();
+
/*
* Get the last valid checkpoint record. If the latest one according
* to pg_control is broken, try the next-to-last one.
if (record != NULL)
{
+ bool recoveryContinue = true;
+ bool recoveryApply = true;
+
InRedo = true;
ereport(LOG,
(errmsg("redo starts at %X/%X",
ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
+
+ /*
+ * main redo apply loop
+ */
do
{
- /* nextXid must be beyond record's xid */
- if (TransactionIdFollowsOrEquals(record->xl_xid,
- ShmemVariableCache->nextXid))
- {
- ShmemVariableCache->nextXid = record->xl_xid;
- TransactionIdAdvance(ShmemVariableCache->nextXid);
- }
-
#ifdef WAL_DEBUG
if (XLOG_DEBUG)
{
}
#endif
+ /*
+ * Have we reached our recovery target?
+ */
+ if (recoveryStopsHere(record, &recoveryApply))
+ {
+ recoveryContinue = false;
+ if (!recoveryApply)
+ break;
+ }
+
+ /* nextXid must be beyond record's xid */
+ if (TransactionIdFollowsOrEquals(record->xl_xid,
+ ShmemVariableCache->nextXid))
+ {
+ ShmemVariableCache->nextXid = record->xl_xid;
+ TransactionIdAdvance(ShmemVariableCache->nextXid);
+ }
+
if (record->xl_info & XLR_BKP_BLOCK_MASK)
RestoreBkpBlocks(record, EndRecPtr);
RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
+
+ LastRec = ReadRecPtr;
+
record = ReadRecord(NULL, LOG, buffer);
- } while (record != NULL);
+ } while (record != NULL && recoveryContinue);
+ /*
+ * end of main redo apply loop
+ */
+
ereport(LOG,
(errmsg("redo done at %X/%X",
ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
- LastRec = ReadRecPtr;
InRedo = false;
}
else
}
/*
- * Init xlog buffer cache using the block containing the last valid
- * record from the previous incarnation.
+ * Re-fetch the last valid or last applied record, so we can identify
+ * the exact endpoint of what we consider the valid portion of WAL.
*/
record = ReadRecord(&LastRec, PANIC, buffer);
EndOfLog = EndRecPtr;
- XLByteToPrevSeg(EndOfLog, openLogId, openLogSeg);
+ XLByteToPrevSeg(EndOfLog, endLogId, endLogSeg);
+
+ /*
+ * We are now done reading the old WAL. Turn off archive fetching
+ * if it was active, and make a writable copy of the last WAL segment.
+ * (Note that we also have a copy of the last block of the old WAL in
+ * readBuf; we will use that below.)
+ */
+ if (InArchiveRecovery)
+ exitArchiveRecovery(endLogId, endLogSeg, EndOfLog.xrecoff);
+
+ /*
+ * Prepare to write WAL starting at EndOfLog position, and init xlog
+ * buffer cache using the block containing the last record from the
+ * previous incarnation.
+ */
+ openLogId = endLogId;
+ openLogSeg = endLogSeg;
openLogFile = XLogFileOpen(openLogId, openLogSeg, false);
openLogOff = 0;
ControlFile->logId = openLogId;
}
#ifdef WAL_DEBUG
+
static void
xlog_outrec(char *buf, XLogRecord *record)
{
sprintf(buf + strlen(buf), ": %s",
RmgrTable[record->xl_rmid].rm_name);
}
+
#endif /* WAL_DEBUG */
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * pgarch.c
+ *
+ * PostgreSQL WAL archiver
+ *
+ * All functions relating to archiver are included here
+ *
+ * - All functions executed by archiver process
+ *
+ * - archiver is forked from postmaster, and the two
+ * processes then communicate using signals. All functions
+ * executed by postmaster are included in this file.
+ *
+ * Initial author: Simon Riggs simon@2ndquadrant.com
+ *
+ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.1 2004/07/19 02:47:08 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <fcntl.h>
+#include <signal.h>
+#include <time.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "postmaster/pgarch.h"
+#include "libpq/pqsignal.h"
+#include "miscadmin.h"
+#include "postmaster/postmaster.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/pg_shmem.h"
+#include "storage/pmsignal.h"
+#include "utils/guc.h"
+#include "utils/ps_status.h"
+
+
+/* ----------
+ * Timer definitions.
+ * ----------
+ */
+#define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of
+ * the archive status directory;
+ * in seconds. */
+#define PGARCH_RESTART_INTERVAL 60 /* How often to attempt to restart
+ * a failed archiver; in seconds. */
+
+/* ----------
+ * Archiver control info.
+ *
+ * We expect that archivable files within pg_xlog will have names between
+ * MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
+ * appearing in VALID_XFN_CHARS. The status files in archive_status have
+ * corresponding names with ".ready" or ".done" appended.
+ * ----------
+ */
+#define MIN_XFN_CHARS 16
+#define MAX_XFN_CHARS 16
+#define VALID_XFN_CHARS "0123456789ABCDEF"
+
+#define NUM_ARCHIVE_RETRIES 3
+
+
+/* ----------
+ * Local data
+ * ----------
+ */
+static char XLogDir[MAXPGPATH];
+static char XLogArchiveStatusDir[MAXPGPATH];
+static time_t last_pgarch_start_time;
+
+/*
+ * Flags set by interrupt handlers for later service in the main loop.
+ */
+static volatile sig_atomic_t got_SIGHUP = false;
+static volatile sig_atomic_t wakened = false;
+
+/* ----------
+ * Local function forward declarations
+ * ----------
+ */
+#ifdef EXEC_BACKEND
+static pid_t pgarch_forkexec(void);
+#endif
+
+NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]);
+static void pgarch_exit(SIGNAL_ARGS);
+static void ArchSigHupHandler(SIGNAL_ARGS);
+static void pgarch_waken(SIGNAL_ARGS);
+static void pgarch_MainLoop(void);
+static void pgarch_ArchiverCopyLoop(void);
+static bool pgarch_archiveXlog(char *xlog);
+static bool pgarch_readyXlog(char *xlog);
+static void pgarch_archiveDone(char *xlog);
+
+
+/* ------------------------------------------------------------
+ * Public functions called from postmaster follow
+ * ------------------------------------------------------------
+ */
+
+/*
+ * pgarch_start
+ *
+ * Called from postmaster at startup or after an existing archiver
+ * died. Attempt to fire up a fresh archiver process.
+ *
+ * Returns PID of child process, or 0 if fail.
+ *
+ * Note: if fail, we will be called again from the postmaster main loop.
+ */
+int
+pgarch_start(void)
+{
+ time_t curtime;
+ pid_t pgArchPid;
+
+ /*
+ * Do nothing if no archiver needed
+ */
+ if (!XLogArchivingActive())
+ return 0;
+
+ /*
+ * Do nothing if too soon since last archiver start. This is a
+ * safety valve to protect against continuous respawn attempts if the
+ * archiver is dying immediately at launch. Note that since we will
+ * be re-called from the postmaster main loop, we will get another
+ * chance later.
+ */
+ curtime = time(NULL);
+ if ((unsigned int) (curtime - last_pgarch_start_time) <
+ (unsigned int) PGARCH_RESTART_INTERVAL)
+ return 0;
+ last_pgarch_start_time = curtime;
+
+ fflush(stdout);
+ fflush(stderr);
+
+#ifdef __BEOS__
+ /* Specific beos actions before backend startup */
+ beos_before_backend_startup();
+#endif
+
+#ifdef EXEC_BACKEND
+ switch ((pgArchPid = pgarch_forkexec()))
+#else
+ switch ((pgArchPid = fork()))
+#endif
+ {
+ case -1:
+#ifdef __BEOS__
+ /* Specific beos actions */
+ beos_backend_startup_failed();
+#endif
+ ereport(LOG,
+ (errmsg("could not fork archiver: %m")));
+ return 0;
+
+#ifndef EXEC_BACKEND
+ case 0:
+ /* in postmaster child ... */
+#ifdef __BEOS__
+ /* Specific beos actions after backend startup */
+ beos_backend_startup();
+#endif
+ /* Close the postmaster's sockets */
+ ClosePostmasterPorts();
+
+ /* Drop our connection to postmaster's shared memory, as well */
+ PGSharedMemoryDetach();
+
+ PgArchiverMain(0, NULL);
+ break;
+#endif
+
+ default:
+ return (int) pgArchPid;
+ }
+
+ /* shouldn't get here */
+ return 0;
+}
+
+/* ------------------------------------------------------------
+ * Local functions called by archiver follow
+ * ------------------------------------------------------------
+ */
+
+
+#ifdef EXEC_BACKEND
+
+/*
+ * pgarch_forkexec() -
+ *
+ * Format up the arglist for, then fork and exec, archive process
+ */
+static pid_t
+pgarch_forkexec(void)
+{
+ char *av[10];
+ int ac = 0;
+
+ av[ac++] = "postgres";
+
+ av[ac++] = "-forkarch";
+
+ av[ac++] = NULL; /* filled in by postmaster_forkexec */
+
+ av[ac] = NULL;
+ Assert(ac < lengthof(av));
+
+ return postmaster_forkexec(ac, av);
+}
+
+#endif /* EXEC_BACKEND */
+
+
+/*
+ * PgArchiverMain
+ *
+ * The argc/argv parameters are valid only in EXEC_BACKEND case. However,
+ * since we don't use 'em, it hardly matters...
+ */
+NON_EXEC_STATIC void
+PgArchiverMain(int argc, char *argv[])
+{
+ IsUnderPostmaster = true; /* we are a postmaster subprocess now */
+
+ MyProcPid = getpid(); /* reset MyProcPid */
+
+ /* Lose the postmaster's on-exit routines */
+ on_exit_reset();
+
+ /*
+ * Ignore all signals usually bound to some action in the postmaster,
+ * except for SIGHUP, SIGUSR1 and SIGQUIT.
+ */
+ pqsignal(SIGHUP, ArchSigHupHandler);
+ pqsignal(SIGINT, SIG_IGN);
+ pqsignal(SIGTERM, SIG_IGN);
+ pqsignal(SIGQUIT, pgarch_exit);
+ pqsignal(SIGALRM, SIG_IGN);
+ pqsignal(SIGPIPE, SIG_IGN);
+ pqsignal(SIGUSR1, pgarch_waken);
+ pqsignal(SIGUSR2, SIG_IGN);
+ pqsignal(SIGCHLD, SIG_DFL);
+ pqsignal(SIGTTIN, SIG_DFL);
+ pqsignal(SIGTTOU, SIG_DFL);
+ pqsignal(SIGCONT, SIG_DFL);
+ pqsignal(SIGWINCH, SIG_DFL);
+ PG_SETMASK(&UnBlockSig);
+
+ /*
+ * Identify myself via ps
+ */
+ init_ps_display("archiver process", "", "");
+ set_ps_display("");
+
+ /* Init XLOG file paths */
+ snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
+ snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir);
+
+ pgarch_MainLoop();
+
+ exit(0);
+}
+
+/* SIGQUIT signal handler for archiver process */
+static void
+pgarch_exit(SIGNAL_ARGS)
+{
+ /*
+ * For now, we just nail the doors shut and get out of town. It might
+ * seem cleaner to finish up any pending archive copies, but there's
+ * a nontrivial risk that init will kill us partway through.
+ */
+ exit(0);
+}
+
+/* SIGHUP: set flag to re-read config file at next convenient time */
+static void
+ArchSigHupHandler(SIGNAL_ARGS)
+{
+ got_SIGHUP = true;
+}
+
+/* SIGUSR1 signal handler for archiver process */
+static void
+pgarch_waken(SIGNAL_ARGS)
+{
+ wakened = true;
+}
+
+/*
+ * pgarch_MainLoop
+ *
+ * Main loop for archiver
+ */
+static void
+pgarch_MainLoop(void)
+{
+ time_t last_copy_time = 0;
+ time_t curtime;
+
+ /*
+ * We run the copy loop immediately upon entry, in case there are
+ * unarchived files left over from a previous database run (or maybe
+ * the archiver died unexpectedly). After that we wait for a signal
+ * or timeout before doing more.
+ */
+ wakened = true;
+
+ do {
+
+ /* Check for config update */
+ if (got_SIGHUP)
+ {
+ got_SIGHUP = false;
+ ProcessConfigFile(PGC_SIGHUP);
+ if (!XLogArchivingActive())
+ break; /* user wants us to shut down */
+ }
+
+ /* Do what we're here for */
+ if (wakened)
+ {
+ wakened = false;
+ pgarch_ArchiverCopyLoop();
+ last_copy_time = time(NULL);
+ }
+
+ /*
+ * There shouldn't be anything for the archiver to do except
+ * to wait for a signal, so we could use pause(3) here...
+ * ...however, the archiver exists to protect our data, so
+ * she wakes up occasionally to allow herself to be proactive.
+ * In particular this avoids getting stuck if a signal arrives
+ * just before we enter sleep().
+ */
+ if (!wakened)
+ {
+ sleep(PGARCH_AUTOWAKE_INTERVAL);
+
+ curtime = time(NULL);
+ if ((unsigned int) (curtime - last_copy_time) >=
+ (unsigned int) PGARCH_AUTOWAKE_INTERVAL)
+ wakened = true;
+ }
+ } while (PostmasterIsAlive(true));
+}
+
+/*
+ * pgarch_ArchiverCopyLoop
+ *
+ * Archives all outstanding xlogs then returns
+ */
+static void
+pgarch_ArchiverCopyLoop(void)
+{
+ char xlog[MAX_XFN_CHARS + 1];
+
+ /*
+ * loop through all xlogs with archive_status of .ready
+ * and archive them...mostly we expect this to be a single
+ * file, though it is possible some backend will add
+ * files onto the list of those that need archiving while we
+ * are still copying earlier archives
+ */
+ while (pgarch_readyXlog(xlog))
+ {
+ int failures = 0;
+
+ for (;;)
+ {
+ if (pgarch_archiveXlog(xlog))
+ {
+ /* successful */
+ pgarch_archiveDone(xlog);
+ break; /* out of inner retry loop */
+ }
+ else
+ {
+ if (++failures >= NUM_ARCHIVE_RETRIES)
+ {
+ ereport(WARNING,
+ (errmsg("transaction log file \"%s\" could not be archived",
+ xlog)));
+ return; /* give up archiving for now */
+ }
+ sleep(1); /* wait a bit before retrying */
+ }
+ }
+ }
+}
+
+/*
+ * pgarch_archiveXlog
+ *
+ * Invokes system(3) to copy one archive file to wherever it should go
+ *
+ * Returns true if successful
+ */
+static bool
+pgarch_archiveXlog(char *xlog)
+{
+ char xlogarchcmd[MAXPGPATH];
+ char pathname[MAXPGPATH];
+ char *dp;
+ char *endp;
+ const char *sp;
+ int rc;
+
+ snprintf(pathname, MAXPGPATH, "%s/%s", XLogDir, xlog);
+
+ /*
+ * construct the command to be executed
+ */
+ dp = xlogarchcmd;
+ endp = xlogarchcmd + MAXPGPATH - 1;
+ *endp = '\0';
+
+ for (sp = XLogArchiveCommand; *sp; sp++)
+ {
+ if (*sp == '%')
+ {
+ switch (sp[1])
+ {
+ case 'p':
+ /* %p: full path of source file */
+ sp++;
+ StrNCpy(dp, pathname, endp-dp);
+ dp += strlen(dp);
+ break;
+ case 'f':
+ /* %f: filename of source file */
+ sp++;
+ StrNCpy(dp, xlog, endp-dp);
+ dp += strlen(dp);
+ break;
+ case '%':
+ /* convert %% to a single % */
+ sp++;
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ default:
+ /* otherwise treat the % as not special */
+ if (dp < endp)
+ *dp++ = *sp;
+ break;
+ }
+ }
+ else
+ {
+ if (dp < endp)
+ *dp++ = *sp;
+ }
+ }
+ *dp = '\0';
+
+ ereport(DEBUG3,
+ (errmsg_internal("executing archive command \"%s\"",
+ xlogarchcmd)));
+ rc = system(xlogarchcmd);
+ if (rc != 0) {
+ ereport(LOG,
+ (errmsg("archive command \"%s\" failed: return code %d",
+ xlogarchcmd, rc)));
+ return false;
+ }
+ ereport(LOG,
+ (errmsg("archived transaction log file \"%s\"", xlog)));
+
+ return true;
+}
+
+/*
+ * pgarch_readyXlog
+ *
+ * Return name of the oldest xlog file that has not yet been archived.
+ * No notification is set that file archiving is now in progress, so
+ * this would need to be extended if multiple concurrent archival
+ * tasks were created. If a failure occurs, we will completely
+ * re-copy the file at the next available opportunity.
+ *
+ * It is important that we return the oldest, so that we archive xlogs
+ * in order that they were written, for two reasons:
+ * 1) to maintain the sequential chain of xlogs required for recovery
+ * 2) because the oldest ones will sooner become candidates for
+ * recycling at time of checkpoint
+ */
+static bool
+pgarch_readyXlog(char *xlog)
+{
+ /*
+ * open xlog status directory and read through list of
+ * xlogs that have the .ready suffix, looking for earliest file.
+ * It is possible to optimise this code, though only a single
+ * file is expected on the vast majority of calls, so....
+ */
+ char newxlog[MAX_XFN_CHARS + 6 + 1];
+ DIR *rldir;
+ struct dirent *rlde;
+ bool found = false;
+
+ rldir = AllocateDir(XLogArchiveStatusDir);
+ if (rldir == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open archive status directory \"%s\": %m",
+ XLogArchiveStatusDir)));
+
+ errno = 0;
+ while ((rlde = readdir(rldir)) != NULL)
+ {
+ int basenamelen = (int) strlen(rlde->d_name) - 6;
+
+ if (basenamelen >= MIN_XFN_CHARS &&
+ basenamelen <= MAX_XFN_CHARS &&
+ strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen &&
+ strcmp(rlde->d_name + basenamelen, ".ready") == 0)
+ {
+ if (!found) {
+ strcpy(newxlog, rlde->d_name);
+ found = true;
+ } else {
+ if (strcmp(rlde->d_name, newxlog) < 0)
+ strcpy(newxlog, rlde->d_name);
+ }
+ }
+
+ errno = 0;
+ }
+#ifdef WIN32
+ /* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
+ not in released version */
+ if (GetLastError() == ERROR_NO_MORE_FILES)
+ errno = 0;
+#endif
+ if (errno)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read archive status directory \"%s\": %m",
+ XLogArchiveStatusDir)));
+ FreeDir(rldir);
+
+ if (found)
+ {
+ /* truncate off the .ready */
+ newxlog[strlen(newxlog) - 6] = '\0';
+ strcpy(xlog, newxlog);
+ }
+ return found;
+}
+
+/*
+ * pgarch_archiveDone
+ *
+ * Emit notification that an xlog file has been successfully archived.
+ * We do this by renaming the status file from NNN.ready to NNN.done.
+ * Eventually, a checkpoint process will notice this and delete both the
+ * NNN.done file and the xlog file itself.
+ */
+static void
+pgarch_archiveDone(char *xlog)
+{
+ char rlogready[MAXPGPATH];
+ char rlogdone[MAXPGPATH];
+ int rc;
+
+ snprintf(rlogready, MAXPGPATH, "%s/%s.ready", XLogArchiveStatusDir, xlog);
+ snprintf(rlogdone, MAXPGPATH, "%s/%s.done", XLogArchiveStatusDir, xlog);
+ rc = rename(rlogready, rlogdone);
+ if (rc < 0)
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not rename \"%s\": %m",
+ rlogready)));
+}