]> granicus.if.org Git - postgresql/blob - src/backend/postmaster/pgarch.c
Update CVS HEAD for 2007 copyright. Back branches are typically not
[postgresql] / src / backend / postmaster / pgarch.c
1 /*-------------------------------------------------------------------------
2  *
3  * pgarch.c
4  *
5  *      PostgreSQL WAL archiver
6  *
7  *      All functions relating to archiver are included here
8  *
9  *      - All functions executed by archiver process
10  *
11  *      - archiver is forked from postmaster, and the two
12  *      processes then communicate using signals. All functions
13  *      executed by postmaster are included in this file.
14  *
15  *      Initial author: Simon Riggs             simon@2ndquadrant.com
16  *
17  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
18  * Portions Copyright (c) 1994, Regents of the University of California
19  *
20  *
21  * IDENTIFICATION
22  *        $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.28 2007/01/05 22:19:36 momjian Exp $
23  *
24  *-------------------------------------------------------------------------
25  */
26 #include "postgres.h"
27
28 #include <fcntl.h>
29 #include <signal.h>
30 #include <time.h>
31 #include <sys/time.h>
32 #include <sys/wait.h>
33 #include <unistd.h>
34
35 #include "access/xlog_internal.h"
36 #include "libpq/pqsignal.h"
37 #include "miscadmin.h"
38 #include "postmaster/fork_process.h"
39 #include "postmaster/pgarch.h"
40 #include "postmaster/postmaster.h"
41 #include "storage/fd.h"
42 #include "storage/ipc.h"
43 #include "storage/pg_shmem.h"
44 #include "storage/pmsignal.h"
45 #include "utils/guc.h"
46 #include "utils/ps_status.h"
47
48
49 /* ----------
50  * Timer definitions.
51  * ----------
52  */
53 #define PGARCH_AUTOWAKE_INTERVAL 60             /* How often to force a poll of the
54                                                                                  * archive status directory; in
55                                                                                  * seconds. */
56 #define PGARCH_RESTART_INTERVAL 10              /* How often to attempt to restart a
57                                                                                  * failed archiver; in seconds. */
58
59 /* ----------
60  * Archiver control info.
61  *
62  * We expect that archivable files within pg_xlog will have names between
63  * MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
64  * appearing in VALID_XFN_CHARS.  The status files in archive_status have
65  * corresponding names with ".ready" or ".done" appended.
66  * ----------
67  */
68 #define MIN_XFN_CHARS   16
69 #define MAX_XFN_CHARS   40
70 #define VALID_XFN_CHARS "0123456789ABCDEF.history.backup"
71
72 #define NUM_ARCHIVE_RETRIES 3
73
74
75 /* ----------
76  * Local data
77  * ----------
78  */
79 static time_t last_pgarch_start_time;
80
81 /*
82  * Flags set by interrupt handlers for later service in the main loop.
83  */
84 static volatile sig_atomic_t got_SIGHUP = false;
85 static volatile sig_atomic_t wakened = false;
86
87 /* ----------
88  * Local function forward declarations
89  * ----------
90  */
91 #ifdef EXEC_BACKEND
92 static pid_t pgarch_forkexec(void);
93 #endif
94
95 NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]);
96 static void pgarch_exit(SIGNAL_ARGS);
97 static void ArchSigHupHandler(SIGNAL_ARGS);
98 static void pgarch_waken(SIGNAL_ARGS);
99 static void pgarch_MainLoop(void);
100 static void pgarch_ArchiverCopyLoop(void);
101 static bool pgarch_archiveXlog(char *xlog);
102 static bool pgarch_readyXlog(char *xlog);
103 static void pgarch_archiveDone(char *xlog);
104
105
106 /* ------------------------------------------------------------
107  * Public functions called from postmaster follow
108  * ------------------------------------------------------------
109  */
110
111 /*
112  * pgarch_start
113  *
114  *      Called from postmaster at startup or after an existing archiver
115  *      died.  Attempt to fire up a fresh archiver process.
116  *
117  *      Returns PID of child process, or 0 if fail.
118  *
119  *      Note: if fail, we will be called again from the postmaster main loop.
120  */
121 int
122 pgarch_start(void)
123 {
124         time_t          curtime;
125         pid_t           pgArchPid;
126
127         /*
128          * Do nothing if no archiver needed
129          */
130         if (!XLogArchivingActive())
131                 return 0;
132
133         /*
134          * Do nothing if too soon since last archiver start.  This is a safety
135          * valve to protect against continuous respawn attempts if the archiver is
136          * dying immediately at launch. Note that since we will be re-called from
137          * the postmaster main loop, we will get another chance later.
138          */
139         curtime = time(NULL);
140         if ((unsigned int) (curtime - last_pgarch_start_time) <
141                 (unsigned int) PGARCH_RESTART_INTERVAL)
142                 return 0;
143         last_pgarch_start_time = curtime;
144
145 #ifdef EXEC_BACKEND
146         switch ((pgArchPid = pgarch_forkexec()))
147 #else
148         switch ((pgArchPid = fork_process()))
149 #endif
150         {
151                 case -1:
152                         ereport(LOG,
153                                         (errmsg("could not fork archiver: %m")));
154                         return 0;
155
156 #ifndef EXEC_BACKEND
157                 case 0:
158                         /* in postmaster child ... */
159                         /* Close the postmaster's sockets */
160                         ClosePostmasterPorts(false);
161
162                         /* Lose the postmaster's on-exit routines */
163                         on_exit_reset();
164
165                         /* Drop our connection to postmaster's shared memory, as well */
166                         PGSharedMemoryDetach();
167
168                         PgArchiverMain(0, NULL);
169                         break;
170 #endif
171
172                 default:
173                         return (int) pgArchPid;
174         }
175
176         /* shouldn't get here */
177         return 0;
178 }
179
180 /* ------------------------------------------------------------
181  * Local functions called by archiver follow
182  * ------------------------------------------------------------
183  */
184
185
186 #ifdef EXEC_BACKEND
187
188 /*
189  * pgarch_forkexec() -
190  *
191  * Format up the arglist for, then fork and exec, archive process
192  */
193 static pid_t
194 pgarch_forkexec(void)
195 {
196         char       *av[10];
197         int                     ac = 0;
198
199         av[ac++] = "postgres";
200
201         av[ac++] = "--forkarch";
202
203         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
204
205         av[ac] = NULL;
206         Assert(ac < lengthof(av));
207
208         return postmaster_forkexec(ac, av);
209 }
210 #endif   /* EXEC_BACKEND */
211
212
213 /*
214  * PgArchiverMain
215  *
216  *      The argc/argv parameters are valid only in EXEC_BACKEND case.  However,
217  *      since we don't use 'em, it hardly matters...
218  */
219 NON_EXEC_STATIC void
220 PgArchiverMain(int argc, char *argv[])
221 {
222         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
223
224         MyProcPid = getpid();           /* reset MyProcPid */
225
226         /*
227          * If possible, make this process a group leader, so that the postmaster
228          * can signal any child processes too.
229          */
230 #ifdef HAVE_SETSID
231         if (setsid() < 0)
232                 elog(FATAL, "setsid() failed: %m");
233 #endif
234
235         /*
236          * Ignore all signals usually bound to some action in the postmaster,
237          * except for SIGHUP, SIGUSR1 and SIGQUIT.
238          */
239         pqsignal(SIGHUP, ArchSigHupHandler);
240         pqsignal(SIGINT, SIG_IGN);
241         pqsignal(SIGTERM, SIG_IGN);
242         pqsignal(SIGQUIT, pgarch_exit);
243         pqsignal(SIGALRM, SIG_IGN);
244         pqsignal(SIGPIPE, SIG_IGN);
245         pqsignal(SIGUSR1, pgarch_waken);
246         pqsignal(SIGUSR2, SIG_IGN);
247         pqsignal(SIGCHLD, SIG_DFL);
248         pqsignal(SIGTTIN, SIG_DFL);
249         pqsignal(SIGTTOU, SIG_DFL);
250         pqsignal(SIGCONT, SIG_DFL);
251         pqsignal(SIGWINCH, SIG_DFL);
252         PG_SETMASK(&UnBlockSig);
253
254         /*
255          * Identify myself via ps
256          */
257         init_ps_display("archiver process", "", "", "");
258
259         pgarch_MainLoop();
260
261         exit(0);
262 }
263
264 /* SIGQUIT signal handler for archiver process */
265 static void
266 pgarch_exit(SIGNAL_ARGS)
267 {
268         /*
269          * For now, we just nail the doors shut and get out of town.  It might
270          * seem cleaner to finish up any pending archive copies, but there's a
271          * nontrivial risk that init will kill us partway through.
272          */
273         exit(0);
274 }
275
276 /* SIGHUP: set flag to re-read config file at next convenient time */
277 static void
278 ArchSigHupHandler(SIGNAL_ARGS)
279 {
280         got_SIGHUP = true;
281 }
282
283 /* SIGUSR1 signal handler for archiver process */
284 static void
285 pgarch_waken(SIGNAL_ARGS)
286 {
287         wakened = true;
288 }
289
290 /*
291  * pgarch_MainLoop
292  *
293  * Main loop for archiver
294  */
295 static void
296 pgarch_MainLoop(void)
297 {
298         time_t          last_copy_time = 0;
299
300         /*
301          * We run the copy loop immediately upon entry, in case there are
302          * unarchived files left over from a previous database run (or maybe the
303          * archiver died unexpectedly).  After that we wait for a signal or
304          * timeout before doing more.
305          */
306         wakened = true;
307
308         do
309         {
310                 /* Check for config update */
311                 if (got_SIGHUP)
312                 {
313                         got_SIGHUP = false;
314                         ProcessConfigFile(PGC_SIGHUP);
315                         if (!XLogArchivingActive())
316                                 break;                  /* user wants us to shut down */
317                 }
318
319                 /* Do what we're here for */
320                 if (wakened)
321                 {
322                         wakened = false;
323                         pgarch_ArchiverCopyLoop();
324                         last_copy_time = time(NULL);
325                 }
326
327                 /*
328                  * There shouldn't be anything for the archiver to do except to wait
329                  * for a signal ... however, the archiver exists to protect our data,
330                  * so she wakes up occasionally to allow herself to be proactive.
331                  *
332                  * On some platforms, signals won't interrupt the sleep.  To ensure we
333                  * respond reasonably promptly when someone signals us, break down the
334                  * sleep into 1-second increments, and check for interrupts after each
335                  * nap.
336                  */
337                 while (!(wakened || got_SIGHUP))
338                 {
339                         time_t          curtime;
340
341                         pg_usleep(1000000L);
342                         curtime = time(NULL);
343                         if ((unsigned int) (curtime - last_copy_time) >=
344                                 (unsigned int) PGARCH_AUTOWAKE_INTERVAL)
345                                 wakened = true;
346                 }
347         } while (PostmasterIsAlive(true));
348 }
349
350 /*
351  * pgarch_ArchiverCopyLoop
352  *
353  * Archives all outstanding xlogs then returns
354  */
355 static void
356 pgarch_ArchiverCopyLoop(void)
357 {
358         char            xlog[MAX_XFN_CHARS + 1];
359
360         /*
361          * loop through all xlogs with archive_status of .ready and archive
362          * them...mostly we expect this to be a single file, though it is possible
363          * some backend will add files onto the list of those that need archiving
364          * while we are still copying earlier archives
365          */
366         while (pgarch_readyXlog(xlog))
367         {
368                 int                     failures = 0;
369
370                 for (;;)
371                 {
372                         /* Abandon processing if we notice our postmaster has died */
373                         if (!PostmasterIsAlive(true))
374                                 return;
375
376                         if (pgarch_archiveXlog(xlog))
377                         {
378                                 /* successful */
379                                 pgarch_archiveDone(xlog);
380                                 break;                  /* out of inner retry loop */
381                         }
382                         else
383                         {
384                                 if (++failures >= NUM_ARCHIVE_RETRIES)
385                                 {
386                                         ereport(WARNING,
387                                                         (errmsg("transaction log file \"%s\" could not be archived: too many failures",
388                                                                         xlog)));
389                                         return;         /* give up archiving for now */
390                                 }
391                                 pg_usleep(1000000L);    /* wait a bit before retrying */
392                         }
393                 }
394         }
395 }
396
397 /*
398  * pgarch_archiveXlog
399  *
400  * Invokes system(3) to copy one archive file to wherever it should go
401  *
402  * Returns true if successful
403  */
404 static bool
405 pgarch_archiveXlog(char *xlog)
406 {
407         char            xlogarchcmd[MAXPGPATH];
408         char            pathname[MAXPGPATH];
409         char       *dp;
410         char       *endp;
411         const char *sp;
412         int                     rc;
413
414         snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
415
416         /*
417          * construct the command to be executed
418          */
419         dp = xlogarchcmd;
420         endp = xlogarchcmd + MAXPGPATH - 1;
421         *endp = '\0';
422
423         for (sp = XLogArchiveCommand; *sp; sp++)
424         {
425                 if (*sp == '%')
426                 {
427                         switch (sp[1])
428                         {
429                                 case 'p':
430                                         /* %p: relative path of source file */
431                                         sp++;
432                                         StrNCpy(dp, pathname, endp - dp);
433                                         make_native_path(dp);
434                                         dp += strlen(dp);
435                                         break;
436                                 case 'f':
437                                         /* %f: filename of source file */
438                                         sp++;
439                                         StrNCpy(dp, xlog, endp - dp);
440                                         dp += strlen(dp);
441                                         break;
442                                 case '%':
443                                         /* convert %% to a single % */
444                                         sp++;
445                                         if (dp < endp)
446                                                 *dp++ = *sp;
447                                         break;
448                                 default:
449                                         /* otherwise treat the % as not special */
450                                         if (dp < endp)
451                                                 *dp++ = *sp;
452                                         break;
453                         }
454                 }
455                 else
456                 {
457                         if (dp < endp)
458                                 *dp++ = *sp;
459                 }
460         }
461         *dp = '\0';
462
463         ereport(DEBUG3,
464                         (errmsg_internal("executing archive command \"%s\"",
465                                                          xlogarchcmd)));
466         rc = system(xlogarchcmd);
467         if (rc != 0)
468         {
469                 /*
470                  * If either the shell itself, or a called command, died on a signal,
471                  * abort the archiver.  We do this because system() ignores SIGINT and
472                  * SIGQUIT while waiting; so a signal is very likely something that
473                  * should have interrupted us too.  If we overreact it's no big deal,
474                  * the postmaster will just start the archiver again.
475                  *
476                  * Per the Single Unix Spec, shells report exit status > 128 when
477                  * a called command died on a signal.
478                  */
479                 bool    signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128;
480
481                 ereport(signaled ? FATAL : LOG,
482                                 (errmsg("archive command \"%s\" failed: return code %d",
483                                                 xlogarchcmd, rc)));
484
485                 return false;
486         }
487         ereport(LOG,
488                         (errmsg("archived transaction log file \"%s\"", xlog)));
489
490         return true;
491 }
492
493 /*
494  * pgarch_readyXlog
495  *
496  * Return name of the oldest xlog file that has not yet been archived.
497  * No notification is set that file archiving is now in progress, so
498  * this would need to be extended if multiple concurrent archival
499  * tasks were created. If a failure occurs, we will completely
500  * re-copy the file at the next available opportunity.
501  *
502  * It is important that we return the oldest, so that we archive xlogs
503  * in order that they were written, for two reasons:
504  * 1) to maintain the sequential chain of xlogs required for recovery
505  * 2) because the oldest ones will sooner become candidates for
506  * recycling at time of checkpoint
507  *
508  * NOTE: the "oldest" comparison will presently consider all segments of
509  * a timeline with a smaller ID to be older than all segments of a timeline
510  * with a larger ID; the net result being that past timelines are given
511  * higher priority for archiving.  This seems okay, or at least not
512  * obviously worth changing.
513  */
514 static bool
515 pgarch_readyXlog(char *xlog)
516 {
517         /*
518          * open xlog status directory and read through list of xlogs that have the
519          * .ready suffix, looking for earliest file. It is possible to optimise
520          * this code, though only a single file is expected on the vast majority
521          * of calls, so....
522          */
523         char            XLogArchiveStatusDir[MAXPGPATH];
524         char            newxlog[MAX_XFN_CHARS + 6 + 1];
525         DIR                *rldir;
526         struct dirent *rlde;
527         bool            found = false;
528
529         snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
530         rldir = AllocateDir(XLogArchiveStatusDir);
531         if (rldir == NULL)
532                 ereport(ERROR,
533                                 (errcode_for_file_access(),
534                                  errmsg("could not open archive status directory \"%s\": %m",
535                                                 XLogArchiveStatusDir)));
536
537         while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
538         {
539                 int                     basenamelen = (int) strlen(rlde->d_name) - 6;
540
541                 if (basenamelen >= MIN_XFN_CHARS &&
542                         basenamelen <= MAX_XFN_CHARS &&
543                         strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen &&
544                         strcmp(rlde->d_name + basenamelen, ".ready") == 0)
545                 {
546                         if (!found)
547                         {
548                                 strcpy(newxlog, rlde->d_name);
549                                 found = true;
550                         }
551                         else
552                         {
553                                 if (strcmp(rlde->d_name, newxlog) < 0)
554                                         strcpy(newxlog, rlde->d_name);
555                         }
556                 }
557         }
558         FreeDir(rldir);
559
560         if (found)
561         {
562                 /* truncate off the .ready */
563                 newxlog[strlen(newxlog) - 6] = '\0';
564                 strcpy(xlog, newxlog);
565         }
566         return found;
567 }
568
569 /*
570  * pgarch_archiveDone
571  *
572  * Emit notification that an xlog file has been successfully archived.
573  * We do this by renaming the status file from NNN.ready to NNN.done.
574  * Eventually, a checkpoint process will notice this and delete both the
575  * NNN.done file and the xlog file itself.
576  */
577 static void
578 pgarch_archiveDone(char *xlog)
579 {
580         char            rlogready[MAXPGPATH];
581         char            rlogdone[MAXPGPATH];
582
583         StatusFilePath(rlogready, xlog, ".ready");
584         StatusFilePath(rlogdone, xlog, ".done");
585         if (rename(rlogready, rlogdone) < 0)
586                 ereport(WARNING,
587                                 (errcode_for_file_access(),
588                                  errmsg("could not rename file \"%s\" to \"%s\": %m",
589                                                 rlogready, rlogdone)));
590 }