1 /*-------------------------------------------------------------------------
4 * Functions for archiving WAL files and restoring from the archive.
7 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/access/transam/xlogarchive.c
12 *-------------------------------------------------------------------------
22 #include "access/xlog.h"
23 #include "access/xlog_internal.h"
24 #include "miscadmin.h"
25 #include "postmaster/startup.h"
26 #include "replication/walsender.h"
27 #include "storage/fd.h"
28 #include "storage/ipc.h"
29 #include "storage/lwlock.h"
30 #include "storage/pmsignal.h"
33 * Attempt to retrieve the specified file from off-line archival storage.
34 * If successful, fill "path" with its complete path (note that this will be
35 * a temp file name that doesn't follow the normal naming convention), and
38 * If not successful, fill "path" with the name of the normal on-line file
39 * (which may or may not actually exist, but we'll try to use it), and return
42 * For fixed-size files, the caller may pass the expected size as an
43 * additional crosscheck on successful recovery. If the file size is not
44 * known, set expectedSize = 0.
46 * When 'cleanupEnabled' is false, refrain from deleting any old WAL segments
47 * in the archive. This is used when fetching the initial checkpoint record,
48 * when we are not yet sure how far back we need the WAL.
51 RestoreArchivedFile(char *path, const char *xlogfname,
52 const char *recovername, off_t expectedSize,
55 char xlogpath[MAXPGPATH];
56 char xlogRestoreCmd[MAXPGPATH];
57 char lastRestartPointFname[MAXPGPATH];
63 XLogSegNo restartSegNo;
64 XLogRecPtr restartRedoPtr;
65 TimeLineID restartTli;
67 /* In standby mode, restore_command might not be supplied */
68 if (recoveryRestoreCommand == NULL || strcmp(recoveryRestoreCommand, "") == 0)
72 * When doing archive recovery, we always prefer an archived log file even
73 * if a file of the same name exists in XLOGDIR. The reason is that the
74 * file in XLOGDIR could be an old, un-filled or partly-filled version
75 * that was copied and restored as part of backing up $PGDATA.
77 * We could try to optimize this slightly by checking the local copy
78 * lastchange timestamp against the archived copy, but we have no API to
79 * do this, nor can we guarantee that the lastchange timestamp was
80 * preserved correctly when we copied to archive. Our aim is robustness,
81 * so we elect not to do this.
83 * If we cannot obtain the log file from the archive, however, we will try
84 * to use the XLOGDIR file if it exists. This is so that we can make use
85 * of log segments that weren't yet transferred to the archive.
87 * Notice that we don't actually overwrite any files when we copy back
88 * from archive because the restore_command may inadvertently restore
89 * inappropriate xlogs, or they may be corrupt, so we may wish to fallback
90 * to the segments remaining in current XLOGDIR later. The
91 * copy-from-archive filename is always the same, ensuring that we don't
92 * run out of disk space on long recoveries.
94 snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
97 * Make sure there is no existing file named recovername.
99 if (stat(xlogpath, &stat_buf) != 0)
103 (errcode_for_file_access(),
104 errmsg("could not stat file \"%s\": %m",
109 if (unlink(xlogpath) != 0)
111 (errcode_for_file_access(),
112 errmsg("could not remove file \"%s\": %m",
117 * Calculate the archive file cutoff point for use during log shipping
118 * replication. All files earlier than this point can be deleted from the
119 * archive, though there is no requirement to do so.
121 * If cleanup is not enabled, initialise this with the filename of
122 * InvalidXLogRecPtr, which will prevent the deletion of any WAL files
123 * from the archive because of the alphabetic sorting property of WAL
126 * Once we have successfully located the redo pointer of the checkpoint
127 * from which we start recovery we never request a file prior to the redo
128 * pointer of the last restartpoint. When redo begins we know that we have
129 * successfully located it, so there is no need for additional status
130 * flags to signify the point when we can begin deleting WAL files from
135 GetOldestRestartPoint(&restartRedoPtr, &restartTli);
136 XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
137 XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
139 /* we shouldn't need anything earlier than last restart point */
140 Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
143 XLogFileName(lastRestartPointFname, 0, 0L, wal_segment_size);
146 * construct the command to be executed
149 endp = xlogRestoreCmd + MAXPGPATH - 1;
152 for (sp = recoveryRestoreCommand; *sp; sp++)
159 /* %p: relative path of target file */
161 StrNCpy(dp, xlogpath, endp - dp);
162 make_native_path(dp);
166 /* %f: filename of desired file */
168 StrNCpy(dp, xlogfname, endp - dp);
172 /* %r: filename of last restartpoint */
174 StrNCpy(dp, lastRestartPointFname, endp - dp);
178 /* convert %% to a single % */
184 /* otherwise treat the % as not special */
199 (errmsg_internal("executing restore command \"%s\"",
203 * Check signals before restore command and reset afterwards.
208 * Copy xlog from archival storage to XLOGDIR
210 rc = system(xlogRestoreCmd);
212 PostRestoreCommand();
217 * command apparently succeeded, but let's make sure the file is
218 * really there now and has the correct size.
220 if (stat(xlogpath, &stat_buf) == 0)
222 if (expectedSize > 0 && stat_buf.st_size != expectedSize)
227 * If we find a partial file in standby mode, we assume it's
228 * because it's just being copied to the archive, and keep
231 * Otherwise treat a wrong-sized file as FATAL to ensure the
232 * DBA would notice it, but is that too strong? We could try
233 * to plow ahead with a local copy of the file ... but the
234 * problem is that there probably isn't one, and we'd
235 * incorrectly conclude we've reached the end of WAL and we're
236 * done recovering ...
238 if (StandbyMode && stat_buf.st_size < expectedSize)
243 (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
245 (unsigned long) stat_buf.st_size,
246 (unsigned long) expectedSize)));
252 (errmsg("restored log file \"%s\" from archive",
254 strcpy(path, xlogpath);
263 (errcode_for_file_access(),
264 errmsg("could not stat file \"%s\": %m",
270 * Remember, we rollforward UNTIL the restore fails so failure here is
271 * just part of the process... that makes it difficult to determine
272 * whether the restore failed because there isn't an archive to restore,
273 * or because the administrator has specified the restore program
274 * incorrectly. We have to assume the former.
276 * However, if the failure was due to any sort of signal, it's best to
277 * punt and abort recovery. (If we "return false" here, upper levels will
278 * assume that recovery is complete and start up the database!) It's
279 * essential to abort on child SIGINT and SIGQUIT, because per spec
280 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
281 * those it's a good bet we should have gotten it too.
283 * On SIGTERM, assume we have received a fast shutdown request, and exit
284 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
285 * child process. If we receive it first, the signal handler will call
286 * proc_exit, otherwise we do it here. If we or the child process received
287 * SIGTERM for any other reason than a fast shutdown request, postmaster
288 * will perform an immediate shutdown when it sees us exiting
291 * We treat hard shell errors such as "command not found" as fatal, too.
293 if (wait_result_is_signal(rc, SIGTERM))
296 ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
297 (errmsg("could not restore file \"%s\" from archive: %s",
298 xlogfname, wait_result_to_str(rc))));
303 * if an archived file is not available, there might still be a version of
304 * this file in XLOGDIR, so return that as the filename to open.
306 * In many recovery scenarios we expect this to fail also, but if so that
307 * just means we've reached the end of WAL.
309 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
314 * Attempt to execute an external shell command during recovery.
316 * 'command' is the shell command to be executed, 'commandName' is a
317 * human-readable name describing the command emitted in the logs. If
318 * 'failOnSignal' is true and the command is killed by a signal, a FATAL
319 * error is thrown. Otherwise a WARNING is emitted.
321 * This is currently used for recovery_end_command and archive_cleanup_command.
324 ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal)
326 char xlogRecoveryCmd[MAXPGPATH];
327 char lastRestartPointFname[MAXPGPATH];
332 XLogSegNo restartSegNo;
333 XLogRecPtr restartRedoPtr;
334 TimeLineID restartTli;
336 Assert(command && commandName);
339 * Calculate the archive file cutoff point for use during log shipping
340 * replication. All files earlier than this point can be deleted from the
341 * archive, though there is no requirement to do so.
343 GetOldestRestartPoint(&restartRedoPtr, &restartTli);
344 XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
345 XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
349 * construct the command to be executed
351 dp = xlogRecoveryCmd;
352 endp = xlogRecoveryCmd + MAXPGPATH - 1;
355 for (sp = command; *sp; sp++)
362 /* %r: filename of last restartpoint */
364 StrNCpy(dp, lastRestartPointFname, endp - dp);
368 /* convert %% to a single % */
374 /* otherwise treat the % as not special */
389 (errmsg_internal("executing %s \"%s\"", commandName, command)));
392 * execute the constructed command
394 rc = system(xlogRecoveryCmd);
398 * If the failure was due to any sort of signal, it's best to punt and
399 * abort recovery. See comments in RestoreArchivedFile().
401 ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
403 translator: First %s represents a postgresql.conf parameter name like
404 "recovery_end_command", the 2nd is the value of that parameter, the
405 third an already translated error message. */
406 (errmsg("%s \"%s\": %s", commandName,
407 command, wait_result_to_str(rc))));
413 * A file was restored from the archive under a temporary filename (path),
414 * and now we want to keep it. Rename it under the permanent filename in
415 * pg_wal (xlogfname), replacing any existing file with the same name.
418 KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
420 char xlogfpath[MAXPGPATH];
424 snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
426 if (stat(xlogfpath, &statbuf) == 0)
428 char oldpath[MAXPGPATH];
431 static unsigned int deletedcounter = 1;
434 * On Windows, if another process (e.g a walsender process) holds the
435 * file open in FILE_SHARE_DELETE mode, unlink will succeed, but the
436 * file will still show up in directory listing until the last handle
437 * is closed, and we cannot rename the new file in its place until
438 * that. To avoid that problem, rename the old file to a temporary
439 * name first. Use a counter to create a unique filename, because the
440 * same file might be restored from the archive multiple times, and a
441 * walsender could still be holding onto an old deleted version of it.
443 snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
444 xlogfpath, deletedcounter++);
445 if (rename(xlogfpath, oldpath) != 0)
448 (errcode_for_file_access(),
449 errmsg("could not rename file \"%s\" to \"%s\": %m",
450 xlogfpath, oldpath)));
453 /* same-size buffers, so this never truncates */
454 strlcpy(oldpath, xlogfpath, MAXPGPATH);
456 if (unlink(oldpath) != 0)
458 (errcode_for_file_access(),
459 errmsg("could not remove file \"%s\": %m",
464 durable_rename(path, xlogfpath, ERROR);
467 * Create .done file forcibly to prevent the restored segment from being
468 * archived again later.
470 if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
471 XLogArchiveForceDone(xlogfname);
473 XLogArchiveNotify(xlogfname);
476 * If the existing file was replaced, since walsenders might have it open,
477 * request them to reload a currently-open segment. This is only required
478 * for WAL segments, walsenders don't hold other files open, but there's
479 * no harm in doing this too often, and we don't know what kind of a file
480 * we're dealing with here.
483 WalSndRqstFileReload();
486 * Signal walsender that new WAL has arrived. Again, this isn't necessary
487 * if we restored something other than a WAL segment, but it does no harm
496 * Create an archive notification file
498 * The name of the notification file is the message that will be picked up
499 * by the archiver, e.g. we write 0000000100000001000000C6.ready
500 * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
501 * then when complete, rename it to 0000000100000001000000C6.done
504 XLogArchiveNotify(const char *xlog)
506 char archiveStatusPath[MAXPGPATH];
509 /* insert an otherwise empty file called <XLOG>.ready */
510 StatusFilePath(archiveStatusPath, xlog, ".ready");
511 fd = AllocateFile(archiveStatusPath, "w");
515 (errcode_for_file_access(),
516 errmsg("could not create archive status file \"%s\": %m",
517 archiveStatusPath)));
523 (errcode_for_file_access(),
524 errmsg("could not write archive status file \"%s\": %m",
525 archiveStatusPath)));
529 /* Notify archiver that it's got something to do */
530 if (IsUnderPostmaster)
531 SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
535 * Convenience routine to notify using segment number representation of filename
538 XLogArchiveNotifySeg(XLogSegNo segno)
540 char xlog[MAXFNAMELEN];
542 XLogFileName(xlog, ThisTimeLineID, segno, wal_segment_size);
543 XLogArchiveNotify(xlog);
547 * XLogArchiveForceDone
549 * Emit notification forcibly that an XLOG segment file has been successfully
550 * archived, by creating <XLOG>.done regardless of whether <XLOG>.ready
554 XLogArchiveForceDone(const char *xlog)
556 char archiveReady[MAXPGPATH];
557 char archiveDone[MAXPGPATH];
558 struct stat stat_buf;
561 /* Exit if already known done */
562 StatusFilePath(archiveDone, xlog, ".done");
563 if (stat(archiveDone, &stat_buf) == 0)
566 /* If .ready exists, rename it to .done */
567 StatusFilePath(archiveReady, xlog, ".ready");
568 if (stat(archiveReady, &stat_buf) == 0)
570 (void) durable_rename(archiveReady, archiveDone, WARNING);
574 /* insert an otherwise empty file called <XLOG>.done */
575 fd = AllocateFile(archiveDone, "w");
579 (errcode_for_file_access(),
580 errmsg("could not create archive status file \"%s\": %m",
587 (errcode_for_file_access(),
588 errmsg("could not write archive status file \"%s\": %m",
595 * XLogArchiveCheckDone
597 * This is called when we are ready to delete or recycle an old XLOG segment
598 * file or backup history file. If it is okay to delete it then return true.
599 * If it is not time to delete it, make sure a .ready file exists, and return
602 * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
603 * then return false; else create <XLOG>.ready and return false.
605 * The reason we do things this way is so that if the original attempt to
606 * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
609 XLogArchiveCheckDone(const char *xlog)
611 char archiveStatusPath[MAXPGPATH];
612 struct stat stat_buf;
613 bool inRecovery = RecoveryInProgress();
616 * The file is always deletable if archive_mode is "off". On standbys
617 * archiving is disabled if archive_mode is "on", and enabled with
618 * "always". On a primary, archiving is enabled if archive_mode is "on"
621 if (!((XLogArchivingActive() && !inRecovery) ||
622 (XLogArchivingAlways() && inRecovery)))
625 /* First check for .done --- this means archiver is done with it */
626 StatusFilePath(archiveStatusPath, xlog, ".done");
627 if (stat(archiveStatusPath, &stat_buf) == 0)
630 /* check for .ready --- this means archiver is still busy with it */
631 StatusFilePath(archiveStatusPath, xlog, ".ready");
632 if (stat(archiveStatusPath, &stat_buf) == 0)
635 /* Race condition --- maybe archiver just finished, so recheck */
636 StatusFilePath(archiveStatusPath, xlog, ".done");
637 if (stat(archiveStatusPath, &stat_buf) == 0)
640 /* Retry creation of the .ready file */
641 XLogArchiveNotify(xlog);
648 * Check to see if an XLOG segment file is still unarchived.
649 * This is almost but not quite the inverse of XLogArchiveCheckDone: in
650 * the first place we aren't chartered to recreate the .ready file, and
651 * in the second place we should consider that if the file is already gone
652 * then it's not busy. (This check is needed to handle the race condition
653 * that a checkpoint already deleted the no-longer-needed file.)
656 XLogArchiveIsBusy(const char *xlog)
658 char archiveStatusPath[MAXPGPATH];
659 struct stat stat_buf;
661 /* First check for .done --- this means archiver is done with it */
662 StatusFilePath(archiveStatusPath, xlog, ".done");
663 if (stat(archiveStatusPath, &stat_buf) == 0)
666 /* check for .ready --- this means archiver is still busy with it */
667 StatusFilePath(archiveStatusPath, xlog, ".ready");
668 if (stat(archiveStatusPath, &stat_buf) == 0)
671 /* Race condition --- maybe archiver just finished, so recheck */
672 StatusFilePath(archiveStatusPath, xlog, ".done");
673 if (stat(archiveStatusPath, &stat_buf) == 0)
677 * Check to see if the WAL file has been removed by checkpoint, which
678 * implies it has already been archived, and explains why we can't see a
679 * status file for it.
681 snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
682 if (stat(archiveStatusPath, &stat_buf) != 0 &&
690 * XLogArchiveIsReadyOrDone
692 * Check to see if an XLOG segment file has a .ready or .done file.
693 * This is similar to XLogArchiveIsBusy(), but returns true if the file
694 * is already archived or is about to be archived.
696 * This is currently only used at recovery. During normal operation this
697 * would be racy: the file might get removed or marked with .ready as we're
698 * checking it, or immediately after we return.
701 XLogArchiveIsReadyOrDone(const char *xlog)
703 char archiveStatusPath[MAXPGPATH];
704 struct stat stat_buf;
706 /* First check for .done --- this means archiver is done with it */
707 StatusFilePath(archiveStatusPath, xlog, ".done");
708 if (stat(archiveStatusPath, &stat_buf) == 0)
711 /* check for .ready --- this means archiver is still busy with it */
712 StatusFilePath(archiveStatusPath, xlog, ".ready");
713 if (stat(archiveStatusPath, &stat_buf) == 0)
716 /* Race condition --- maybe archiver just finished, so recheck */
717 StatusFilePath(archiveStatusPath, xlog, ".done");
718 if (stat(archiveStatusPath, &stat_buf) == 0)
727 * Check to see if an XLOG segment file has an archive notification (.ready)
731 XLogArchiveIsReady(const char *xlog)
733 char archiveStatusPath[MAXPGPATH];
734 struct stat stat_buf;
736 StatusFilePath(archiveStatusPath, xlog, ".ready");
737 if (stat(archiveStatusPath, &stat_buf) == 0)
746 * Cleanup archive notification file(s) for a particular xlog segment
749 XLogArchiveCleanup(const char *xlog)
751 char archiveStatusPath[MAXPGPATH];
753 /* Remove the .done file */
754 StatusFilePath(archiveStatusPath, xlog, ".done");
755 unlink(archiveStatusPath);
756 /* should we complain about failure? */
758 /* Remove the .ready file if present --- normally it shouldn't be */
759 StatusFilePath(archiveStatusPath, xlog, ".ready");
760 unlink(archiveStatusPath);
761 /* should we complain about failure? */