1 /*-------------------------------------------------------------------------
4 * Functions for archiving WAL files and restoring from the archive.
7 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/access/transam/xlogarchive.c
12 *-------------------------------------------------------------------------
17 #include <sys/types.h>
23 #include "access/xlog.h"
24 #include "access/xlog_internal.h"
25 #include "miscadmin.h"
26 #include "postmaster/startup.h"
27 #include "replication/walsender.h"
28 #include "storage/fd.h"
29 #include "storage/ipc.h"
30 #include "storage/lwlock.h"
31 #include "storage/pmsignal.h"
34 * Attempt to retrieve the specified file from off-line archival storage.
35 * If successful, fill "path" with its complete path (note that this will be
36 * a temp file name that doesn't follow the normal naming convention), and
39 * If not successful, fill "path" with the name of the normal on-line file
40 * (which may or may not actually exist, but we'll try to use it), and return
43 * For fixed-size files, the caller may pass the expected size as an
44 * additional crosscheck on successful recovery. If the file size is not
45 * known, set expectedSize = 0.
47 * When 'cleanupEnabled' is false, refrain from deleting any old WAL segments
48 * in the archive. This is used when fetching the initial checkpoint record,
49 * when we are not yet sure how far back we need the WAL.
52 RestoreArchivedFile(char *path, const char *xlogfname,
53 const char *recovername, off_t expectedSize,
56 char xlogpath[MAXPGPATH];
57 char xlogRestoreCmd[MAXPGPATH];
58 char lastRestartPointFname[MAXPGPATH];
65 XLogSegNo restartSegNo;
66 XLogRecPtr restartRedoPtr;
67 TimeLineID restartTli;
69 /* In standby mode, restore_command might not be supplied */
70 if (recoveryRestoreCommand == NULL)
74 * When doing archive recovery, we always prefer an archived log file even
75 * if a file of the same name exists in XLOGDIR. The reason is that the
76 * file in XLOGDIR could be an old, un-filled or partly-filled version
77 * that was copied and restored as part of backing up $PGDATA.
79 * We could try to optimize this slightly by checking the local copy
80 * lastchange timestamp against the archived copy, but we have no API to
81 * do this, nor can we guarantee that the lastchange timestamp was
82 * preserved correctly when we copied to archive. Our aim is robustness,
83 * so we elect not to do this.
85 * If we cannot obtain the log file from the archive, however, we will try
86 * to use the XLOGDIR file if it exists. This is so that we can make use
87 * of log segments that weren't yet transferred to the archive.
89 * Notice that we don't actually overwrite any files when we copy back
90 * from archive because the restore_command may inadvertently restore
91 * inappropriate xlogs, or they may be corrupt, so we may wish to fallback
92 * to the segments remaining in current XLOGDIR later. The
93 * copy-from-archive filename is always the same, ensuring that we don't
94 * run out of disk space on long recoveries.
96 snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
99 * Make sure there is no existing file named recovername.
101 if (stat(xlogpath, &stat_buf) != 0)
105 (errcode_for_file_access(),
106 errmsg("could not stat file \"%s\": %m",
111 if (unlink(xlogpath) != 0)
113 (errcode_for_file_access(),
114 errmsg("could not remove file \"%s\": %m",
119 * Calculate the archive file cutoff point for use during log shipping
120 * replication. All files earlier than this point can be deleted from the
121 * archive, though there is no requirement to do so.
123 * If cleanup is not enabled, initialise this with the filename of
124 * InvalidXLogRecPtr, which will prevent the deletion of any WAL files
125 * from the archive because of the alphabetic sorting property of WAL
128 * Once we have successfully located the redo pointer of the checkpoint
129 * from which we start recovery we never request a file prior to the redo
130 * pointer of the last restartpoint. When redo begins we know that we have
131 * successfully located it, so there is no need for additional status
132 * flags to signify the point when we can begin deleting WAL files from
137 GetOldestRestartPoint(&restartRedoPtr, &restartTli);
138 XLByteToSeg(restartRedoPtr, restartSegNo);
139 XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
140 /* we shouldn't need anything earlier than last restart point */
141 Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
144 XLogFileName(lastRestartPointFname, 0, 0L);
147 * construct the command to be executed
150 endp = xlogRestoreCmd + MAXPGPATH - 1;
153 for (sp = recoveryRestoreCommand; *sp; sp++)
160 /* %p: relative path of target file */
162 StrNCpy(dp, xlogpath, endp - dp);
163 make_native_path(dp);
167 /* %f: filename of desired file */
169 StrNCpy(dp, xlogfname, endp - dp);
173 /* %r: filename of last restartpoint */
175 StrNCpy(dp, lastRestartPointFname, endp - dp);
179 /* convert %% to a single % */
185 /* otherwise treat the % as not special */
200 (errmsg_internal("executing restore command \"%s\"",
204 * Check signals before restore command and reset afterwards.
209 * Copy xlog from archival storage to XLOGDIR
211 rc = system(xlogRestoreCmd);
213 PostRestoreCommand();
218 * command apparently succeeded, but let's make sure the file is
219 * really there now and has the correct size.
221 if (stat(xlogpath, &stat_buf) == 0)
223 if (expectedSize > 0 && stat_buf.st_size != expectedSize)
228 * If we find a partial file in standby mode, we assume it's
229 * because it's just being copied to the archive, and keep
232 * Otherwise treat a wrong-sized file as FATAL to ensure the
233 * DBA would notice it, but is that too strong? We could try
234 * to plow ahead with a local copy of the file ... but the
235 * problem is that there probably isn't one, and we'd
236 * incorrectly conclude we've reached the end of WAL and we're
237 * done recovering ...
239 if (StandbyMode && stat_buf.st_size < expectedSize)
244 (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
246 (unsigned long) stat_buf.st_size,
247 (unsigned long) expectedSize)));
253 (errmsg("restored log file \"%s\" from archive",
255 strcpy(path, xlogpath);
264 (errcode_for_file_access(),
265 errmsg("could not stat file \"%s\": %m",
271 * Remember, we rollforward UNTIL the restore fails so failure here is
272 * just part of the process... that makes it difficult to determine
273 * whether the restore failed because there isn't an archive to restore,
274 * or because the administrator has specified the restore program
275 * incorrectly. We have to assume the former.
277 * However, if the failure was due to any sort of signal, it's best to
278 * punt and abort recovery. (If we "return false" here, upper levels will
279 * assume that recovery is complete and start up the database!) It's
280 * essential to abort on child SIGINT and SIGQUIT, because per spec
281 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
282 * those it's a good bet we should have gotten it too.
284 * On SIGTERM, assume we have received a fast shutdown request, and exit
285 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
286 * child process. If we receive it first, the signal handler will call
287 * proc_exit, otherwise we do it here. If we or the child process received
288 * SIGTERM for any other reason than a fast shutdown request, postmaster
289 * will perform an immediate shutdown when it sees us exiting
292 * Per the Single Unix Spec, shells report exit status > 128 when a called
293 * command died on a signal. Also, 126 and 127 are used to report
294 * problems such as an unfindable command; treat those as fatal errors
297 if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
300 signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
302 ereport(signaled ? FATAL : DEBUG2,
303 (errmsg("could not restore file \"%s\" from archive: %s",
304 xlogfname, wait_result_to_str(rc))));
309 * if an archived file is not available, there might still be a version of
310 * this file in XLOGDIR, so return that as the filename to open.
312 * In many recovery scenarios we expect this to fail also, but if so that
313 * just means we've reached the end of WAL.
315 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
320 * Attempt to execute an external shell command during recovery.
322 * 'command' is the shell command to be executed, 'commandName' is a
323 * human-readable name describing the command emitted in the logs. If
324 * 'failOnSignal' is true and the command is killed by a signal, a FATAL
325 * error is thrown. Otherwise a WARNING is emitted.
327 * This is currently used for recovery_end_command and archive_cleanup_command.
330 ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
332 char xlogRecoveryCmd[MAXPGPATH];
333 char lastRestartPointFname[MAXPGPATH];
339 XLogSegNo restartSegNo;
340 XLogRecPtr restartRedoPtr;
341 TimeLineID restartTli;
343 Assert(command && commandName);
346 * Calculate the archive file cutoff point for use during log shipping
347 * replication. All files earlier than this point can be deleted from the
348 * archive, though there is no requirement to do so.
350 GetOldestRestartPoint(&restartRedoPtr, &restartTli);
351 XLByteToSeg(restartRedoPtr, restartSegNo);
352 XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
355 * construct the command to be executed
357 dp = xlogRecoveryCmd;
358 endp = xlogRecoveryCmd + MAXPGPATH - 1;
361 for (sp = command; *sp; sp++)
368 /* %r: filename of last restartpoint */
370 StrNCpy(dp, lastRestartPointFname, endp - dp);
374 /* convert %% to a single % */
380 /* otherwise treat the % as not special */
395 (errmsg_internal("executing %s \"%s\"", commandName, command)));
398 * execute the constructed command
400 rc = system(xlogRecoveryCmd);
404 * If the failure was due to any sort of signal, it's best to punt and
405 * abort recovery. See also detailed comments on signals in
406 * RestoreArchivedFile().
408 signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
410 ereport((signaled && failOnSignal) ? FATAL : WARNING,
412 translator: First %s represents a recovery.conf parameter name like
413 "recovery_end_command", the 2nd is the value of that parameter, the
414 third an already translated error message. */
415 (errmsg("%s \"%s\": %s", commandName,
416 command, wait_result_to_str(rc))));
422 * A file was restored from the archive under a temporary filename (path),
423 * and now we want to keep it. Rename it under the permanent filename in
424 * in pg_xlog (xlogfname), replacing any existing file with the same name.
427 KeepFileRestoredFromArchive(char *path, char *xlogfname)
429 char xlogfpath[MAXPGPATH];
433 snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
435 if (stat(xlogfpath, &statbuf) == 0)
437 char oldpath[MAXPGPATH];
440 static unsigned int deletedcounter = 1;
443 * On Windows, if another process (e.g a walsender process) holds the
444 * file open in FILE_SHARE_DELETE mode, unlink will succeed, but the
445 * file will still show up in directory listing until the last handle
446 * is closed, and we cannot rename the new file in its place until
447 * that. To avoid that problem, rename the old file to a temporary
448 * name first. Use a counter to create a unique filename, because the
449 * same file might be restored from the archive multiple times, and a
450 * walsender could still be holding onto an old deleted version of it.
452 snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
453 xlogfpath, deletedcounter++);
454 if (rename(xlogfpath, oldpath) != 0)
457 (errcode_for_file_access(),
458 errmsg("could not rename file \"%s\" to \"%s\": %m",
459 xlogfpath, oldpath)));
462 /* same-size buffers, so this never truncates */
463 strlcpy(oldpath, xlogfpath, MAXPGPATH);
465 if (unlink(oldpath) != 0)
467 (errcode_for_file_access(),
468 errmsg("could not remove file \"%s\": %m",
473 durable_rename(path, xlogfpath, ERROR);
476 * Create .done file forcibly to prevent the restored segment from being
477 * archived again later.
479 if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
480 XLogArchiveForceDone(xlogfname);
482 XLogArchiveNotify(xlogfname);
485 * If the existing file was replaced, since walsenders might have it open,
486 * request them to reload a currently-open segment. This is only required
487 * for WAL segments, walsenders don't hold other files open, but there's
488 * no harm in doing this too often, and we don't know what kind of a file
489 * we're dealing with here.
492 WalSndRqstFileReload();
495 * Signal walsender that new WAL has arrived. Again, this isn't necessary
496 * if we restored something other than a WAL segment, but it does no harm
505 * Create an archive notification file
507 * The name of the notification file is the message that will be picked up
508 * by the archiver, e.g. we write 0000000100000001000000C6.ready
509 * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
510 * then when complete, rename it to 0000000100000001000000C6.done
513 XLogArchiveNotify(const char *xlog)
515 char archiveStatusPath[MAXPGPATH];
518 /* insert an otherwise empty file called <XLOG>.ready */
519 StatusFilePath(archiveStatusPath, xlog, ".ready");
520 fd = AllocateFile(archiveStatusPath, "w");
524 (errcode_for_file_access(),
525 errmsg("could not create archive status file \"%s\": %m",
526 archiveStatusPath)));
532 (errcode_for_file_access(),
533 errmsg("could not write archive status file \"%s\": %m",
534 archiveStatusPath)));
538 /* Notify archiver that it's got something to do */
539 if (IsUnderPostmaster)
540 SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
544 * Convenience routine to notify using segment number representation of filename
547 XLogArchiveNotifySeg(XLogSegNo segno)
549 char xlog[MAXFNAMELEN];
551 XLogFileName(xlog, ThisTimeLineID, segno);
552 XLogArchiveNotify(xlog);
556 * XLogArchiveForceDone
558 * Emit notification forcibly that an XLOG segment file has been successfully
559 * archived, by creating <XLOG>.done regardless of whether <XLOG>.ready
563 XLogArchiveForceDone(const char *xlog)
565 char archiveReady[MAXPGPATH];
566 char archiveDone[MAXPGPATH];
567 struct stat stat_buf;
570 /* Exit if already known done */
571 StatusFilePath(archiveDone, xlog, ".done");
572 if (stat(archiveDone, &stat_buf) == 0)
575 /* If .ready exists, rename it to .done */
576 StatusFilePath(archiveReady, xlog, ".ready");
577 if (stat(archiveReady, &stat_buf) == 0)
579 (void) durable_rename(archiveReady, archiveDone, WARNING);
583 /* insert an otherwise empty file called <XLOG>.done */
584 fd = AllocateFile(archiveDone, "w");
588 (errcode_for_file_access(),
589 errmsg("could not create archive status file \"%s\": %m",
596 (errcode_for_file_access(),
597 errmsg("could not write archive status file \"%s\": %m",
604 * XLogArchiveCheckDone
606 * This is called when we are ready to delete or recycle an old XLOG segment
607 * file or backup history file. If it is okay to delete it then return true.
608 * If it is not time to delete it, make sure a .ready file exists, and return
611 * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
612 * then return false; else create <XLOG>.ready and return false.
614 * The reason we do things this way is so that if the original attempt to
615 * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
618 XLogArchiveCheckDone(const char *xlog)
620 char archiveStatusPath[MAXPGPATH];
621 struct stat stat_buf;
623 /* Always deletable if archiving is off */
624 if (!XLogArchivingActive())
627 /* First check for .done --- this means archiver is done with it */
628 StatusFilePath(archiveStatusPath, xlog, ".done");
629 if (stat(archiveStatusPath, &stat_buf) == 0)
632 /* check for .ready --- this means archiver is still busy with it */
633 StatusFilePath(archiveStatusPath, xlog, ".ready");
634 if (stat(archiveStatusPath, &stat_buf) == 0)
637 /* Race condition --- maybe archiver just finished, so recheck */
638 StatusFilePath(archiveStatusPath, xlog, ".done");
639 if (stat(archiveStatusPath, &stat_buf) == 0)
642 /* Retry creation of the .ready file */
643 XLogArchiveNotify(xlog);
650 * Check to see if an XLOG segment file is still unarchived.
651 * This is almost but not quite the inverse of XLogArchiveCheckDone: in
652 * the first place we aren't chartered to recreate the .ready file, and
653 * in the second place we should consider that if the file is already gone
654 * then it's not busy. (This check is needed to handle the race condition
655 * that a checkpoint already deleted the no-longer-needed file.)
658 XLogArchiveIsBusy(const char *xlog)
660 char archiveStatusPath[MAXPGPATH];
661 struct stat stat_buf;
663 /* First check for .done --- this means archiver is done with it */
664 StatusFilePath(archiveStatusPath, xlog, ".done");
665 if (stat(archiveStatusPath, &stat_buf) == 0)
668 /* check for .ready --- this means archiver is still busy with it */
669 StatusFilePath(archiveStatusPath, xlog, ".ready");
670 if (stat(archiveStatusPath, &stat_buf) == 0)
673 /* Race condition --- maybe archiver just finished, so recheck */
674 StatusFilePath(archiveStatusPath, xlog, ".done");
675 if (stat(archiveStatusPath, &stat_buf) == 0)
679 * Check to see if the WAL file has been removed by checkpoint, which
680 * implies it has already been archived, and explains why we can't see a
681 * status file for it.
683 snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
684 if (stat(archiveStatusPath, &stat_buf) != 0 &&
692 * XLogArchiveIsReadyOrDone
694 * Check to see if an XLOG segment file has a .ready or .done file.
695 * This is similar to XLogArchiveIsBusy(), but returns true if the file
696 * is already archived or is about to be archived.
698 * This is currently only used at recovery. During normal operation this
699 * would be racy: the file might get removed or marked with .ready as we're
700 * checking it, or immediately after we return.
703 XLogArchiveIsReadyOrDone(const char *xlog)
705 char archiveStatusPath[MAXPGPATH];
706 struct stat stat_buf;
708 /* First check for .done --- this means archiver is done with it */
709 StatusFilePath(archiveStatusPath, xlog, ".done");
710 if (stat(archiveStatusPath, &stat_buf) == 0)
713 /* check for .ready --- this means archiver is still busy with it */
714 StatusFilePath(archiveStatusPath, xlog, ".ready");
715 if (stat(archiveStatusPath, &stat_buf) == 0)
718 /* Race condition --- maybe archiver just finished, so recheck */
719 StatusFilePath(archiveStatusPath, xlog, ".done");
720 if (stat(archiveStatusPath, &stat_buf) == 0)
729 * Check to see if an XLOG segment file has an archive notification (.ready)
733 XLogArchiveIsReady(const char *xlog)
735 char archiveStatusPath[MAXPGPATH];
736 struct stat stat_buf;
738 StatusFilePath(archiveStatusPath, xlog, ".ready");
739 if (stat(archiveStatusPath, &stat_buf) == 0)
748 * Cleanup archive notification file(s) for a particular xlog segment
751 XLogArchiveCleanup(const char *xlog)
753 char archiveStatusPath[MAXPGPATH];
755 /* Remove the .done file */
756 StatusFilePath(archiveStatusPath, xlog, ".done");
757 unlink(archiveStatusPath);
758 /* should we complain about failure? */
760 /* Remove the .ready file if present --- normally it shouldn't be */
761 StatusFilePath(archiveStatusPath, xlog, ".ready");
762 unlink(archiveStatusPath);
763 /* should we complain about failure? */