1 /*-------------------------------------------------------------------------
5 * PostgreSQL WAL archiver
7 * All functions relating to archiver are included here
9 * - All functions executed by archiver process
11 * - archiver is forked from postmaster, and the two
12 * processes then communicate using signals. All functions
13 * executed by postmaster are included in this file.
15 * Initial author: Simon Riggs simon@2ndquadrant.com
17 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
22 * $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.28 2007/01/05 22:19:36 momjian Exp $
24 *-------------------------------------------------------------------------
35 #include "access/xlog_internal.h"
36 #include "libpq/pqsignal.h"
37 #include "miscadmin.h"
38 #include "postmaster/fork_process.h"
39 #include "postmaster/pgarch.h"
40 #include "postmaster/postmaster.h"
41 #include "storage/fd.h"
42 #include "storage/ipc.h"
43 #include "storage/pg_shmem.h"
44 #include "storage/pmsignal.h"
45 #include "utils/guc.h"
46 #include "utils/ps_status.h"
53 #define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of the
54 * archive status directory; in
56 #define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a
57 * failed archiver; in seconds. */
60 * Archiver control info.
62 * We expect that archivable files within pg_xlog will have names between
63 * MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
64 * appearing in VALID_XFN_CHARS. The status files in archive_status have
65 * corresponding names with ".ready" or ".done" appended.
68 #define MIN_XFN_CHARS 16
69 #define MAX_XFN_CHARS 40
70 #define VALID_XFN_CHARS "0123456789ABCDEF.history.backup"
72 #define NUM_ARCHIVE_RETRIES 3
79 static time_t last_pgarch_start_time;
82 * Flags set by interrupt handlers for later service in the main loop.
84 static volatile sig_atomic_t got_SIGHUP = false;
85 static volatile sig_atomic_t wakened = false;
88 * Local function forward declarations
92 static pid_t pgarch_forkexec(void);
95 NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]);
96 static void pgarch_exit(SIGNAL_ARGS);
97 static void ArchSigHupHandler(SIGNAL_ARGS);
98 static void pgarch_waken(SIGNAL_ARGS);
99 static void pgarch_MainLoop(void);
100 static void pgarch_ArchiverCopyLoop(void);
101 static bool pgarch_archiveXlog(char *xlog);
102 static bool pgarch_readyXlog(char *xlog);
103 static void pgarch_archiveDone(char *xlog);
106 /* ------------------------------------------------------------
107 * Public functions called from postmaster follow
108 * ------------------------------------------------------------
114 * Called from postmaster at startup or after an existing archiver
115 * died. Attempt to fire up a fresh archiver process.
117 * Returns PID of child process, or 0 if fail.
119 * Note: if fail, we will be called again from the postmaster main loop.
128 * Do nothing if no archiver needed
130 if (!XLogArchivingActive())
134 * Do nothing if too soon since last archiver start. This is a safety
135 * valve to protect against continuous respawn attempts if the archiver is
136 * dying immediately at launch. Note that since we will be re-called from
137 * the postmaster main loop, we will get another chance later.
139 curtime = time(NULL);
140 if ((unsigned int) (curtime - last_pgarch_start_time) <
141 (unsigned int) PGARCH_RESTART_INTERVAL)
143 last_pgarch_start_time = curtime;
146 switch ((pgArchPid = pgarch_forkexec()))
148 switch ((pgArchPid = fork_process()))
153 (errmsg("could not fork archiver: %m")));
158 /* in postmaster child ... */
159 /* Close the postmaster's sockets */
160 ClosePostmasterPorts(false);
162 /* Lose the postmaster's on-exit routines */
165 /* Drop our connection to postmaster's shared memory, as well */
166 PGSharedMemoryDetach();
168 PgArchiverMain(0, NULL);
173 return (int) pgArchPid;
176 /* shouldn't get here */
180 /* ------------------------------------------------------------
181 * Local functions called by archiver follow
182 * ------------------------------------------------------------
189 * pgarch_forkexec() -
191 * Format up the arglist for, then fork and exec, archive process
194 pgarch_forkexec(void)
199 av[ac++] = "postgres";
201 av[ac++] = "--forkarch";
203 av[ac++] = NULL; /* filled in by postmaster_forkexec */
206 Assert(ac < lengthof(av));
208 return postmaster_forkexec(ac, av);
210 #endif /* EXEC_BACKEND */
216 * The argc/argv parameters are valid only in EXEC_BACKEND case. However,
217 * since we don't use 'em, it hardly matters...
220 PgArchiverMain(int argc, char *argv[])
222 IsUnderPostmaster = true; /* we are a postmaster subprocess now */
224 MyProcPid = getpid(); /* reset MyProcPid */
227 * If possible, make this process a group leader, so that the postmaster
228 * can signal any child processes too.
232 elog(FATAL, "setsid() failed: %m");
236 * Ignore all signals usually bound to some action in the postmaster,
237 * except for SIGHUP, SIGUSR1 and SIGQUIT.
239 pqsignal(SIGHUP, ArchSigHupHandler);
240 pqsignal(SIGINT, SIG_IGN);
241 pqsignal(SIGTERM, SIG_IGN);
242 pqsignal(SIGQUIT, pgarch_exit);
243 pqsignal(SIGALRM, SIG_IGN);
244 pqsignal(SIGPIPE, SIG_IGN);
245 pqsignal(SIGUSR1, pgarch_waken);
246 pqsignal(SIGUSR2, SIG_IGN);
247 pqsignal(SIGCHLD, SIG_DFL);
248 pqsignal(SIGTTIN, SIG_DFL);
249 pqsignal(SIGTTOU, SIG_DFL);
250 pqsignal(SIGCONT, SIG_DFL);
251 pqsignal(SIGWINCH, SIG_DFL);
252 PG_SETMASK(&UnBlockSig);
255 * Identify myself via ps
257 init_ps_display("archiver process", "", "", "");
264 /* SIGQUIT signal handler for archiver process */
266 pgarch_exit(SIGNAL_ARGS)
269 * For now, we just nail the doors shut and get out of town. It might
270 * seem cleaner to finish up any pending archive copies, but there's a
271 * nontrivial risk that init will kill us partway through.
276 /* SIGHUP: set flag to re-read config file at next convenient time */
278 ArchSigHupHandler(SIGNAL_ARGS)
283 /* SIGUSR1 signal handler for archiver process */
285 pgarch_waken(SIGNAL_ARGS)
293 * Main loop for archiver
296 pgarch_MainLoop(void)
298 time_t last_copy_time = 0;
301 * We run the copy loop immediately upon entry, in case there are
302 * unarchived files left over from a previous database run (or maybe the
303 * archiver died unexpectedly). After that we wait for a signal or
304 * timeout before doing more.
310 /* Check for config update */
314 ProcessConfigFile(PGC_SIGHUP);
315 if (!XLogArchivingActive())
316 break; /* user wants us to shut down */
319 /* Do what we're here for */
323 pgarch_ArchiverCopyLoop();
324 last_copy_time = time(NULL);
328 * There shouldn't be anything for the archiver to do except to wait
329 * for a signal ... however, the archiver exists to protect our data,
330 * so she wakes up occasionally to allow herself to be proactive.
332 * On some platforms, signals won't interrupt the sleep. To ensure we
333 * respond reasonably promptly when someone signals us, break down the
334 * sleep into 1-second increments, and check for interrupts after each
337 while (!(wakened || got_SIGHUP))
342 curtime = time(NULL);
343 if ((unsigned int) (curtime - last_copy_time) >=
344 (unsigned int) PGARCH_AUTOWAKE_INTERVAL)
347 } while (PostmasterIsAlive(true));
351 * pgarch_ArchiverCopyLoop
353 * Archives all outstanding xlogs then returns
356 pgarch_ArchiverCopyLoop(void)
358 char xlog[MAX_XFN_CHARS + 1];
361 * loop through all xlogs with archive_status of .ready and archive
362 * them...mostly we expect this to be a single file, though it is possible
363 * some backend will add files onto the list of those that need archiving
364 * while we are still copying earlier archives
366 while (pgarch_readyXlog(xlog))
372 /* Abandon processing if we notice our postmaster has died */
373 if (!PostmasterIsAlive(true))
376 if (pgarch_archiveXlog(xlog))
379 pgarch_archiveDone(xlog);
380 break; /* out of inner retry loop */
384 if (++failures >= NUM_ARCHIVE_RETRIES)
387 (errmsg("transaction log file \"%s\" could not be archived: too many failures",
389 return; /* give up archiving for now */
391 pg_usleep(1000000L); /* wait a bit before retrying */
400 * Invokes system(3) to copy one archive file to wherever it should go
402 * Returns true if successful
405 pgarch_archiveXlog(char *xlog)
407 char xlogarchcmd[MAXPGPATH];
408 char pathname[MAXPGPATH];
414 snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
417 * construct the command to be executed
420 endp = xlogarchcmd + MAXPGPATH - 1;
423 for (sp = XLogArchiveCommand; *sp; sp++)
430 /* %p: relative path of source file */
432 StrNCpy(dp, pathname, endp - dp);
433 make_native_path(dp);
437 /* %f: filename of source file */
439 StrNCpy(dp, xlog, endp - dp);
443 /* convert %% to a single % */
449 /* otherwise treat the % as not special */
464 (errmsg_internal("executing archive command \"%s\"",
466 rc = system(xlogarchcmd);
470 * If either the shell itself, or a called command, died on a signal,
471 * abort the archiver. We do this because system() ignores SIGINT and
472 * SIGQUIT while waiting; so a signal is very likely something that
473 * should have interrupted us too. If we overreact it's no big deal,
474 * the postmaster will just start the archiver again.
476 * Per the Single Unix Spec, shells report exit status > 128 when
477 * a called command died on a signal.
479 bool signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128;
481 ereport(signaled ? FATAL : LOG,
482 (errmsg("archive command \"%s\" failed: return code %d",
488 (errmsg("archived transaction log file \"%s\"", xlog)));
496 * Return name of the oldest xlog file that has not yet been archived.
497 * No notification is set that file archiving is now in progress, so
498 * this would need to be extended if multiple concurrent archival
499 * tasks were created. If a failure occurs, we will completely
500 * re-copy the file at the next available opportunity.
502 * It is important that we return the oldest, so that we archive xlogs
503 * in order that they were written, for two reasons:
504 * 1) to maintain the sequential chain of xlogs required for recovery
505 * 2) because the oldest ones will sooner become candidates for
506 * recycling at time of checkpoint
508 * NOTE: the "oldest" comparison will presently consider all segments of
509 * a timeline with a smaller ID to be older than all segments of a timeline
510 * with a larger ID; the net result being that past timelines are given
511 * higher priority for archiving. This seems okay, or at least not
512 * obviously worth changing.
515 pgarch_readyXlog(char *xlog)
518 * open xlog status directory and read through list of xlogs that have the
519 * .ready suffix, looking for earliest file. It is possible to optimise
520 * this code, though only a single file is expected on the vast majority
523 char XLogArchiveStatusDir[MAXPGPATH];
524 char newxlog[MAX_XFN_CHARS + 6 + 1];
529 snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
530 rldir = AllocateDir(XLogArchiveStatusDir);
533 (errcode_for_file_access(),
534 errmsg("could not open archive status directory \"%s\": %m",
535 XLogArchiveStatusDir)));
537 while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
539 int basenamelen = (int) strlen(rlde->d_name) - 6;
541 if (basenamelen >= MIN_XFN_CHARS &&
542 basenamelen <= MAX_XFN_CHARS &&
543 strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen &&
544 strcmp(rlde->d_name + basenamelen, ".ready") == 0)
548 strcpy(newxlog, rlde->d_name);
553 if (strcmp(rlde->d_name, newxlog) < 0)
554 strcpy(newxlog, rlde->d_name);
562 /* truncate off the .ready */
563 newxlog[strlen(newxlog) - 6] = '\0';
564 strcpy(xlog, newxlog);
572 * Emit notification that an xlog file has been successfully archived.
573 * We do this by renaming the status file from NNN.ready to NNN.done.
574 * Eventually, a checkpoint process will notice this and delete both the
575 * NNN.done file and the xlog file itself.
578 pgarch_archiveDone(char *xlog)
580 char rlogready[MAXPGPATH];
581 char rlogdone[MAXPGPATH];
583 StatusFilePath(rlogready, xlog, ".ready");
584 StatusFilePath(rlogdone, xlog, ".done");
585 if (rename(rlogready, rlogdone) < 0)
587 (errcode_for_file_access(),
588 errmsg("could not rename file \"%s\" to \"%s\": %m",
589 rlogready, rlogdone)));