1 /*-------------------------------------------------------------------------
4 * Functions for archiving WAL files and restoring from the archive.
7 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/access/transam/xlogarchive.c
12 *-------------------------------------------------------------------------
17 #include <sys/types.h>
23 #include "access/xlog_internal.h"
24 #include "miscadmin.h"
25 #include "postmaster/startup.h"
26 #include "storage/fd.h"
27 #include "storage/ipc.h"
28 #include "storage/lwlock.h"
29 #include "storage/pmsignal.h"
32 * Attempt to retrieve the specified file from off-line archival storage.
33 * If successful, fill "path" with its complete path (note that this will be
34 * a temp file name that doesn't follow the normal naming convention), and
37 * If not successful, fill "path" with the name of the normal on-line file
38 * (which may or may not actually exist, but we'll try to use it), and return
41 * For fixed-size files, the caller may pass the expected size as an
42 * additional crosscheck on successful recovery. If the file size is not
43 * known, set expectedSize = 0.
45 * When 'cleanupEnabled' is false, refrain from deleting any old WAL segments
46 * in the archive. This is used when fetching the initial checkpoint record,
47 * when we are not yet sure how far back we need the WAL.
50 RestoreArchivedFile(char *path, const char *xlogfname,
51 const char *recovername, off_t expectedSize,
54 char xlogpath[MAXPGPATH];
55 char xlogRestoreCmd[MAXPGPATH];
56 char lastRestartPointFname[MAXPGPATH];
63 XLogSegNo restartSegNo;
64 XLogRecPtr restartRedoPtr;
65 TimeLineID restartTli;
67 /* In standby mode, restore_command might not be supplied */
68 if (recoveryRestoreCommand == NULL)
72 * When doing archive recovery, we always prefer an archived log file even
73 * if a file of the same name exists in XLOGDIR. The reason is that the
74 * file in XLOGDIR could be an old, un-filled or partly-filled version
75 * that was copied and restored as part of backing up $PGDATA.
77 * We could try to optimize this slightly by checking the local copy
78 * lastchange timestamp against the archived copy, but we have no API to
79 * do this, nor can we guarantee that the lastchange timestamp was
80 * preserved correctly when we copied to archive. Our aim is robustness,
81 * so we elect not to do this.
83 * If we cannot obtain the log file from the archive, however, we will try
84 * to use the XLOGDIR file if it exists. This is so that we can make use
85 * of log segments that weren't yet transferred to the archive.
87 * Notice that we don't actually overwrite any files when we copy back
88 * from archive because the restore_command may inadvertently
89 * restore inappropriate xlogs, or they may be corrupt, so we may wish to
90 * fallback to the segments remaining in current XLOGDIR later. The
91 * copy-from-archive filename is always the same, ensuring that we don't
92 * run out of disk space on long recoveries.
94 snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
97 * Make sure there is no existing file named recovername.
99 if (stat(xlogpath, &stat_buf) != 0)
103 (errcode_for_file_access(),
104 errmsg("could not stat file \"%s\": %m",
109 if (unlink(xlogpath) != 0)
111 (errcode_for_file_access(),
112 errmsg("could not remove file \"%s\": %m",
117 * Calculate the archive file cutoff point for use during log shipping
118 * replication. All files earlier than this point can be deleted from the
119 * archive, though there is no requirement to do so.
121 * If cleanup is not enabled, initialise this with the filename of
122 * InvalidXLogRecPtr, which will prevent the deletion of any WAL files
123 * from the archive because of the alphabetic sorting property of WAL
126 * Once we have successfully located the redo pointer of the checkpoint
127 * from which we start recovery we never request a file prior to the redo
128 * pointer of the last restartpoint. When redo begins we know that we have
129 * successfully located it, so there is no need for additional status
130 * flags to signify the point when we can begin deleting WAL files from
135 GetOldestRestartPoint(&restartRedoPtr, &restartTli);
136 XLByteToSeg(restartRedoPtr, restartSegNo);
137 XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
138 /* we shouldn't need anything earlier than last restart point */
139 Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
142 XLogFileName(lastRestartPointFname, 0, 0L);
145 * construct the command to be executed
148 endp = xlogRestoreCmd + MAXPGPATH - 1;
151 for (sp = recoveryRestoreCommand; *sp; sp++)
158 /* %p: relative path of target file */
160 StrNCpy(dp, xlogpath, endp - dp);
161 make_native_path(dp);
165 /* %f: filename of desired file */
167 StrNCpy(dp, xlogfname, endp - dp);
171 /* %r: filename of last restartpoint */
173 StrNCpy(dp, lastRestartPointFname, endp - dp);
177 /* convert %% to a single % */
183 /* otherwise treat the % as not special */
198 (errmsg_internal("executing restore command \"%s\"",
202 * Check signals before restore command and reset afterwards.
207 * Copy xlog from archival storage to XLOGDIR
209 rc = system(xlogRestoreCmd);
211 PostRestoreCommand();
216 * command apparently succeeded, but let's make sure the file is
217 * really there now and has the correct size.
219 if (stat(xlogpath, &stat_buf) == 0)
221 if (expectedSize > 0 && stat_buf.st_size != expectedSize)
226 * If we find a partial file in standby mode, we assume it's
227 * because it's just being copied to the archive, and keep
230 * Otherwise treat a wrong-sized file as FATAL to ensure the
231 * DBA would notice it, but is that too strong? We could try
232 * to plow ahead with a local copy of the file ... but the
233 * problem is that there probably isn't one, and we'd
234 * incorrectly conclude we've reached the end of WAL and we're
235 * done recovering ...
237 if (StandbyMode && stat_buf.st_size < expectedSize)
242 (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
244 (unsigned long) stat_buf.st_size,
245 (unsigned long) expectedSize)));
251 (errmsg("restored log file \"%s\" from archive",
253 strcpy(path, xlogpath);
262 (errcode_for_file_access(),
263 errmsg("could not stat file \"%s\": %m",
269 * Remember, we rollforward UNTIL the restore fails so failure here is
270 * just part of the process... that makes it difficult to determine
271 * whether the restore failed because there isn't an archive to restore,
272 * or because the administrator has specified the restore program
273 * incorrectly. We have to assume the former.
275 * However, if the failure was due to any sort of signal, it's best to
276 * punt and abort recovery. (If we "return false" here, upper levels will
277 * assume that recovery is complete and start up the database!) It's
278 * essential to abort on child SIGINT and SIGQUIT, because per spec
279 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
280 * those it's a good bet we should have gotten it too.
282 * On SIGTERM, assume we have received a fast shutdown request, and exit
283 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
284 * child process. If we receive it first, the signal handler will call
285 * proc_exit, otherwise we do it here. If we or the child process received
286 * SIGTERM for any other reason than a fast shutdown request, postmaster
287 * will perform an immediate shutdown when it sees us exiting
290 * Per the Single Unix Spec, shells report exit status > 128 when a called
291 * command died on a signal. Also, 126 and 127 are used to report
292 * problems such as an unfindable command; treat those as fatal errors
295 if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
298 signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
300 ereport(signaled ? FATAL : DEBUG2,
301 (errmsg("could not restore file \"%s\" from archive: return code %d",
307 * if an archived file is not available, there might still be a version of
308 * this file in XLOGDIR, so return that as the filename to open.
310 * In many recovery scenarios we expect this to fail also, but if so that
311 * just means we've reached the end of WAL.
313 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
318 * Attempt to execute an external shell command during recovery.
320 * 'command' is the shell command to be executed, 'commandName' is a
321 * human-readable name describing the command emitted in the logs. If
322 * 'failOnSignal' is true and the command is killed by a signal, a FATAL
323 * error is thrown. Otherwise a WARNING is emitted.
325 * This is currently used for recovery_end_command and archive_cleanup_command.
328 ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
330 char xlogRecoveryCmd[MAXPGPATH];
331 char lastRestartPointFname[MAXPGPATH];
337 XLogSegNo restartSegNo;
338 XLogRecPtr restartRedoPtr;
339 TimeLineID restartTli;
341 Assert(command && commandName);
344 * Calculate the archive file cutoff point for use during log shipping
345 * replication. All files earlier than this point can be deleted from the
346 * archive, though there is no requirement to do so.
348 GetOldestRestartPoint(&restartRedoPtr, &restartTli);
349 XLByteToSeg(restartRedoPtr, restartSegNo);
350 XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
353 * construct the command to be executed
355 dp = xlogRecoveryCmd;
356 endp = xlogRecoveryCmd + MAXPGPATH - 1;
359 for (sp = command; *sp; sp++)
366 /* %r: filename of last restartpoint */
368 StrNCpy(dp, lastRestartPointFname, endp - dp);
372 /* convert %% to a single % */
378 /* otherwise treat the % as not special */
393 (errmsg_internal("executing %s \"%s\"", commandName, command)));
396 * execute the constructed command
398 rc = system(xlogRecoveryCmd);
402 * If the failure was due to any sort of signal, it's best to punt and
403 * abort recovery. See also detailed comments on signals in
404 * RestoreArchivedFile().
406 signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
408 ereport((signaled && failOnSignal) ? FATAL : WARNING,
410 translator: First %s represents a recovery.conf parameter name like
411 "recovery_end_command", and the 2nd is the value of that parameter. */
412 (errmsg("%s \"%s\": return code %d", commandName,
421 * Create an archive notification file
423 * The name of the notification file is the message that will be picked up
424 * by the archiver, e.g. we write 0000000100000001000000C6.ready
425 * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
426 * then when complete, rename it to 0000000100000001000000C6.done
429 XLogArchiveNotify(const char *xlog)
431 char archiveStatusPath[MAXPGPATH];
434 /* insert an otherwise empty file called <XLOG>.ready */
435 StatusFilePath(archiveStatusPath, xlog, ".ready");
436 fd = AllocateFile(archiveStatusPath, "w");
440 (errcode_for_file_access(),
441 errmsg("could not create archive status file \"%s\": %m",
442 archiveStatusPath)));
448 (errcode_for_file_access(),
449 errmsg("could not write archive status file \"%s\": %m",
450 archiveStatusPath)));
454 /* Notify archiver that it's got something to do */
455 if (IsUnderPostmaster)
456 SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
460 * Convenience routine to notify using segment number representation of filename
463 XLogArchiveNotifySeg(XLogSegNo segno)
465 char xlog[MAXFNAMELEN];
467 XLogFileName(xlog, ThisTimeLineID, segno);
468 XLogArchiveNotify(xlog);
472 * XLogArchiveCheckDone
474 * This is called when we are ready to delete or recycle an old XLOG segment
475 * file or backup history file. If it is okay to delete it then return true.
476 * If it is not time to delete it, make sure a .ready file exists, and return
479 * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
480 * then return false; else create <XLOG>.ready and return false.
482 * The reason we do things this way is so that if the original attempt to
483 * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
486 XLogArchiveCheckDone(const char *xlog)
488 char archiveStatusPath[MAXPGPATH];
489 struct stat stat_buf;
491 /* Always deletable if archiving is off */
492 if (!XLogArchivingActive())
495 /* First check for .done --- this means archiver is done with it */
496 StatusFilePath(archiveStatusPath, xlog, ".done");
497 if (stat(archiveStatusPath, &stat_buf) == 0)
500 /* check for .ready --- this means archiver is still busy with it */
501 StatusFilePath(archiveStatusPath, xlog, ".ready");
502 if (stat(archiveStatusPath, &stat_buf) == 0)
505 /* Race condition --- maybe archiver just finished, so recheck */
506 StatusFilePath(archiveStatusPath, xlog, ".done");
507 if (stat(archiveStatusPath, &stat_buf) == 0)
510 /* Retry creation of the .ready file */
511 XLogArchiveNotify(xlog);
518 * Check to see if an XLOG segment file is still unarchived.
519 * This is almost but not quite the inverse of XLogArchiveCheckDone: in
520 * the first place we aren't chartered to recreate the .ready file, and
521 * in the second place we should consider that if the file is already gone
522 * then it's not busy. (This check is needed to handle the race condition
523 * that a checkpoint already deleted the no-longer-needed file.)
526 XLogArchiveIsBusy(const char *xlog)
528 char archiveStatusPath[MAXPGPATH];
529 struct stat stat_buf;
531 /* First check for .done --- this means archiver is done with it */
532 StatusFilePath(archiveStatusPath, xlog, ".done");
533 if (stat(archiveStatusPath, &stat_buf) == 0)
536 /* check for .ready --- this means archiver is still busy with it */
537 StatusFilePath(archiveStatusPath, xlog, ".ready");
538 if (stat(archiveStatusPath, &stat_buf) == 0)
541 /* Race condition --- maybe archiver just finished, so recheck */
542 StatusFilePath(archiveStatusPath, xlog, ".done");
543 if (stat(archiveStatusPath, &stat_buf) == 0)
547 * Check to see if the WAL file has been removed by checkpoint, which
548 * implies it has already been archived, and explains why we can't see a
549 * status file for it.
551 snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
552 if (stat(archiveStatusPath, &stat_buf) != 0 &&
562 * Cleanup archive notification file(s) for a particular xlog segment
565 XLogArchiveCleanup(const char *xlog)
567 char archiveStatusPath[MAXPGPATH];
569 /* Remove the .done file */
570 StatusFilePath(archiveStatusPath, xlog, ".done");
571 unlink(archiveStatusPath);
572 /* should we complain about failure? */
574 /* Remove the .ready file if present --- normally it shouldn't be */
575 StatusFilePath(archiveStatusPath, xlog, ".ready");
576 unlink(archiveStatusPath);
577 /* should we complain about failure? */