2 * contrib/pg_standby/pg_standby.c
7 * Production-ready example of how to create a Warm Standby
8 * database server using continuous archiving as a
9 * replication mechanism
11 * We separate the parameters for archive and nextWALfile
12 * so that we can check the archive exists, even if the
13 * WAL file doesn't (yet).
15 * This program will be executed once in full for each file
16 * requested by the warm standby server.
18 * It is designed to cater to a variety of needs, as well
19 * providing a customizable section.
21 * Original author: Simon Riggs simon@2ndquadrant.com
22 * Current maintainer: Simon Riggs
24 #include "postgres_fe.h"
33 int getopt(int argc, char *const argv[], const char *optstring);
48 /* Options and defaults */
49 int sleeptime = 5; /* amount of time to sleep between file checks */
50 int waittime = -1; /* how long we have been waiting, -1 no wait
52 int maxwaittime = 0; /* how long are we prepared to wait for? */
53 int keepfiles = 0; /* number of WAL files to keep, 0 keep all */
54 int maxretries = 3; /* number of retries on restore command */
55 bool debug = false; /* are we debugging? */
56 bool need_cleanup = false; /* do we need to remove files from
60 static volatile sig_atomic_t signaled = false;
63 char *archiveLocation; /* where to find the archive? */
64 char *triggerPath; /* where to find the trigger file? */
65 char *xlogFilePath; /* where we are going to restore to */
66 char *nextWALFileName; /* the file we need to get from archive */
67 char *restartWALFileName; /* the file from which we can restart restore */
68 char *priorWALFileName; /* the file we need to get from archive */
69 char WALFilePath[MAXPGPATH]; /* the file path including archive */
70 char restoreCommand[MAXPGPATH]; /* run this to restore */
71 char exclusiveCleanupFileName[MAXPGPATH]; /* the file we need to
75 * Two types of failover are supported (smart and fast failover).
77 * The content of the trigger file determines the type of failover. If the
78 * trigger file contains the word "smart" (or the file is empty), smart
79 * failover is chosen: pg_standby acts as cp or ln command itself, on
80 * successful completion all the available WAL records will be applied
81 * resulting in zero data loss. But, it might take a long time to finish
82 * recovery if there's a lot of unapplied WAL.
84 * On the other hand, if the trigger file contains the word "fast", the
85 * recovery is finished immediately even if unapplied WAL files remain. Any
86 * transactions in the unapplied WAL files are lost.
88 * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
89 * fast failover. A timeout causes fast failover (smart failover would have
90 * the same effect, since if the timeout is reached there is no unapplied WAL).
93 #define SmartFailover 1
94 #define FastFailover 2
96 static int Failover = NoFailover;
98 #define RESTORE_COMMAND_COPY 0
99 #define RESTORE_COMMAND_LINK 1
100 int restoreCommandType;
103 #define XLOG_HISTORY 1
104 #define XLOG_BACKUP_LABEL 2
107 #define SET_RESTORE_COMMAND(cmd, arg1, arg2) \
108 snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
110 struct stat stat_buf;
112 /* =====================================================================
114 * Customizable section
116 * =====================================================================
118 * Currently, this section assumes that the Archive is a locally
119 * accessible directory. If you want to make other assumptions,
120 * such as using a vendor-specific archive and access API, these
121 * routines are the ones you'll need to change. You're
122 * encouraged to submit any changes to pgsql-hackers@postgresql.org
123 * or personally to the current maintainer. Those changes may be
124 * folded in to later versions of this program.
127 #define XLOG_DATA_FNAME_LEN 24
128 /* Reworked from access/xlog_internal.h */
129 #define XLogFileName(fname, tli, log, seg) \
130 snprintf(fname, XLOG_DATA_FNAME_LEN + 1, "%08X%08X%08X", tli, log, seg)
133 * Initialize allows customized commands into the warm standby program.
135 * As an example, and probably the common case, we use either
136 * cp/ln commands on *nix, or copy/move command on Windows.
139 CustomizableInitialize(void)
142 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
143 switch (restoreCommandType)
145 case RESTORE_COMMAND_LINK:
146 SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
148 case RESTORE_COMMAND_COPY:
150 SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
154 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
155 switch (restoreCommandType)
157 case RESTORE_COMMAND_LINK:
158 #if HAVE_WORKING_LINK
159 SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
162 case RESTORE_COMMAND_COPY:
164 SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
170 * This code assumes that archiveLocation is a directory You may wish to
171 * add code to check for tape libraries, etc.. So, since it is a
172 * directory, we use stat to test if it's accessible
174 if (stat(archiveLocation, &stat_buf) != 0)
176 fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation);
183 * CustomizableNextWALFileReady()
185 * Is the requested file ready yet?
188 CustomizableNextWALFileReady()
190 if (stat(WALFilePath, &stat_buf) == 0)
193 * If it's a backup file, return immediately. If it's a regular file
194 * return only if it's the right size already.
196 if (strlen(nextWALFileName) > 24 &&
197 strspn(nextWALFileName, "0123456789ABCDEF") == 24 &&
198 strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".backup"),
201 nextWALFileType = XLOG_BACKUP_LABEL;
204 else if (stat_buf.st_size == XLOG_SEG_SIZE)
209 * Windows 'cp' sets the final file size before the copy is
210 * complete, and not yet ready to be opened by pg_standby. So we
211 * wait for sleeptime secs before attempting to restore. If that
212 * is not enough, we will rely on the retry/holdoff mechanism.
213 * GNUWin32's cp does not have this problem.
215 pg_usleep(sleeptime * 1000000L);
217 nextWALFileType = XLOG_DATA;
222 * If still too small, wait until it is the correct size
224 if (stat_buf.st_size > XLOG_SEG_SIZE)
228 fprintf(stderr, "file size greater than expected\n");
238 #define MaxSegmentsPerLogFile ( 0xFFFFFFFF / XLOG_SEG_SIZE )
241 CustomizableCleanupPriorWALFiles(void)
244 * Work out name of prior file from current filename
246 if (nextWALFileType == XLOG_DATA)
253 * Assume it's OK to keep failing. The failure situation may change
254 * over time, so we'd rather keep going on the main processing than
255 * fail because we couldn't clean up yet.
257 if ((xldir = opendir(archiveLocation)) != NULL)
259 while ((xlde = readdir(xldir)) != NULL)
262 * We ignore the timeline part of the XLOG segment identifiers
263 * in deciding whether a segment is still needed. This
264 * ensures that we won't prematurely remove a segment from a
265 * parent timeline. We could probably be a little more
266 * proactive about removing segments of non-parent timelines,
267 * but that would be a whole lot more complicated.
269 * We use the alphanumeric sorting property of the filenames
270 * to decide which ones are earlier than the
271 * exclusiveCleanupFileName file. Note that this means files
272 * are not removed in the order they were originally written,
273 * in case this worries you.
275 if (strlen(xlde->d_name) == XLOG_DATA_FNAME_LEN &&
276 strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_DATA_FNAME_LEN &&
277 strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0)
280 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, xlde->d_name);
282 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, xlde->d_name);
286 fprintf(stderr, "\nremoving file \"%s\"", WALFilePath);
288 rc = unlink(WALFilePath);
291 fprintf(stderr, "\n%s: ERROR: could not remove file \"%s\": %s\n",
292 progname, WALFilePath, strerror(errno));
298 fprintf(stderr, "\n");
301 fprintf(stderr, "%s: could not open archive location \"%s\": %s\n",
302 progname, archiveLocation, strerror(errno));
309 /* =====================================================================
310 * End of Customizable section
311 * =====================================================================
315 * SetWALFileNameForCleanup()
317 * Set the earliest WAL filename that we want to keep on the archive
318 * and decide whether we need_cleanup
321 SetWALFileNameForCleanup(void)
328 bool cleanup = false;
330 if (restartWALFileName)
333 * Don't do cleanup if the restartWALFileName provided is later than
334 * the xlog file requested. This is an error and we must not remove
335 * these files from archive. This shouldn't happen, but better safe
338 if (strcmp(restartWALFileName, nextWALFileName) > 0)
341 strcpy(exclusiveCleanupFileName, restartWALFileName);
347 sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
348 if (tli > 0 && log >= 0 && seg > 0)
350 log_diff = keepfiles / MaxSegmentsPerLogFile;
351 seg_diff = keepfiles % MaxSegmentsPerLogFile;
355 seg = MaxSegmentsPerLogFile - (seg_diff - seg);
373 XLogFileName(exclusiveCleanupFileName, tli, log, seg);
379 * CheckForExternalTrigger()
381 * Is there a trigger file? Sets global 'Failover' variable to indicate
382 * what kind of a trigger file it was. A "fast" trigger file is turned
383 * into a "smart" file as a side-effect.
386 CheckForExternalTrigger(void)
393 * Look for a trigger file, if that option has been selected
395 * We use stat() here because triggerPath is always a file rather than
396 * potentially being in an archive
398 if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
402 * An empty trigger file performs smart failover. There's a little race
403 * condition here: if the writer of the trigger file has just created the
404 * file, but not yet written anything to it, we'll treat that as smart
405 * shutdown even if the other process was just about to write "fast" to
406 * it. But that's fine: we'll restore one more WAL file, and when we're
407 * invoked next time, we'll see the word "fast" and fail over immediately.
409 if (stat_buf.st_size == 0)
411 Failover = SmartFailover;
412 fprintf(stderr, "trigger file found: smart failover\n");
417 if ((fd = open(triggerPath, O_RDWR, 0)) < 0)
419 fprintf(stderr, "WARNING: could not open \"%s\": %s\n",
420 triggerPath, strerror(errno));
425 if ((len = read(fd, buf, sizeof(buf))) < 0)
427 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
428 triggerPath, strerror(errno));
435 if (strncmp(buf, "smart", 5) == 0)
437 Failover = SmartFailover;
438 fprintf(stderr, "trigger file found: smart failover\n");
444 if (strncmp(buf, "fast", 4) == 0)
446 Failover = FastFailover;
448 fprintf(stderr, "trigger file found: fast failover\n");
452 * Turn it into a "smart" trigger by truncating the file. Otherwise if
453 * the server asks us again to restore a segment that was restored
454 * already, we would return "not found" and upset the server.
456 if (ftruncate(fd, 0) < 0)
458 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
459 triggerPath, strerror(errno));
468 fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath);
474 * RestoreWALFileForRecovery()
476 * Perform the action required to restore the file from archive
479 RestoreWALFileForRecovery(void)
486 fprintf(stderr, "running restore :");
490 while (numretries <= maxretries)
492 rc = system(restoreCommand);
497 fprintf(stderr, " OK\n");
502 pg_usleep(numretries++ * sleeptime * 1000000L);
506 * Allow caller to add additional info
509 fprintf(stderr, "not restored\n");
516 printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname);
518 printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname);
519 printf("\nOptions:\n");
520 printf(" -c copy file from archive (default)\n");
521 printf(" -d generate lots of debugging output (testing only)\n");
522 printf(" -k NUMFILESTOKEEP if RESTARTWALFILE is not used, remove files prior to limit\n"
524 printf(" -l does nothing; use of link is now deprecated\n");
525 printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n"
527 printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n"
529 printf(" -t TRIGGERFILE trigger file to initiate failover (no default)\n");
530 printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n");
531 printf(" --help show this help, then exit\n");
532 printf(" --version output version information, then exit\n");
534 "Main intended use as restore_command in recovery.conf:\n"
535 " restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"
537 " restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n");
538 printf("\nReport bugs to <pgsql-bugs@postgresql.org>.\n");
548 /* We don't want SIGQUIT to core dump */
550 sigquit_handler(int sig)
552 signal(SIGINT, SIG_DFL);
553 kill(getpid(), SIGINT);
557 /*------------ MAIN ----------------------------------------*/
559 main(int argc, char **argv)
563 progname = get_progname(argv[0]);
567 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
572 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
574 puts("pg_standby (PostgreSQL) " PG_VERSION);
582 * You can send SIGUSR1 to trigger failover.
584 * Postmaster uses SIGQUIT to request immediate shutdown. The default
585 * action is to core dump, but we don't want that, so trap it and commit
586 * suicide without core dump.
588 * We used to use SIGINT and SIGQUIT to trigger failover, but that turned
589 * out to be a bad idea because postmaster uses SIGQUIT to request
590 * immediate shutdown. We still trap SIGINT, but that may change in a
593 * There's no way to trigger failover via signal on Windows.
595 (void) signal(SIGUSR1, sighandler);
596 (void) signal(SIGINT, sighandler); /* deprecated, use SIGUSR1 */
597 (void) signal(SIGQUIT, sigquit_handler);
600 while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
604 case 'c': /* Use copy */
605 restoreCommandType = RESTORE_COMMAND_COPY;
607 case 'd': /* Debug mode */
610 case 'k': /* keepfiles */
611 keepfiles = atoi(optarg);
614 fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname);
618 case 'l': /* Use link */
621 * Link feature disabled, possibly permanently. Linking causes
622 * a problem after recovery ends that is not currently
623 * resolved by PostgreSQL. 25 Jun 2009
626 restoreCommandType = RESTORE_COMMAND_LINK;
629 case 'r': /* Retries */
630 maxretries = atoi(optarg);
633 fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname);
637 case 's': /* Sleep time */
638 sleeptime = atoi(optarg);
639 if (sleeptime <= 0 || sleeptime > 60)
641 fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname);
645 case 't': /* Trigger file */
646 triggerPath = optarg;
648 case 'w': /* Max wait time */
649 maxwaittime = atoi(optarg);
652 fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname);
657 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
664 * Parameter checking - after checking to see if trigger file present
668 fprintf(stderr, "%s: not enough command-line arguments\n", progname);
673 * We will go to the archiveLocation to get nextWALFileName.
674 * nextWALFileName may not exist yet, which would not be an error, so we
675 * separate the archiveLocation and nextWALFileName so we can check
676 * separately whether archiveLocation exists, if not that is an error
680 archiveLocation = argv[optind];
685 fprintf(stderr, "%s: must specify archive location\n", progname);
686 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
692 nextWALFileName = argv[optind];
697 fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname);
698 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
704 xlogFilePath = argv[optind];
709 fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname);
710 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
716 restartWALFileName = argv[optind];
720 CustomizableInitialize();
722 need_cleanup = SetWALFileNameForCleanup();
726 fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : "<not set>");
727 fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName);
728 fprintf(stderr, "WAL file path: %s\n", WALFilePath);
729 fprintf(stderr, "Restoring to: %s\n", xlogFilePath);
730 fprintf(stderr, "Sleep interval: %d second%s\n",
731 sleeptime, (sleeptime > 1 ? "s" : " "));
732 fprintf(stderr, "Max wait interval: %d %s\n",
733 maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
734 fprintf(stderr, "Command for restore: %s\n", restoreCommand);
735 fprintf(stderr, "Keep archive history: ");
737 fprintf(stderr, "%s and later\n", exclusiveCleanupFileName);
739 fprintf(stderr, "no cleanup required\n");
744 * Check for initial history file: always the first file to be requested
745 * It's OK if the file isn't there - all other files need to wait
747 if (strlen(nextWALFileName) > 8 &&
748 strspn(nextWALFileName, "0123456789ABCDEF") == 8 &&
749 strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".history"),
752 nextWALFileType = XLOG_HISTORY;
753 if (RestoreWALFileForRecovery())
759 fprintf(stderr, "history file not found\n");
771 /* Check for trigger file or signal first */
772 CheckForExternalTrigger();
776 Failover = FastFailover;
779 fprintf(stderr, "signaled to exit: fast failover\n");
786 * Check for fast failover immediately, before checking if the
787 * requested WAL file is available
789 if (Failover == FastFailover)
792 if (CustomizableNextWALFileReady())
795 * Once we have restored this file successfully we can remove some
796 * prior WAL files. If this restore fails we musn't remove any
797 * file because some of them will be requested again immediately
798 * after the failed restore, or when we restart recovery.
800 if (RestoreWALFileForRecovery())
803 CustomizableCleanupPriorWALFiles();
809 /* Something went wrong in copying the file */
814 /* Check for smart failover if the next WAL file was not available */
815 if (Failover == SmartFailover)
819 pg_usleep(sleeptime * 1000000L);
821 waittime += sleeptime;
822 if (waittime >= maxwaittime && maxwaittime > 0)
824 Failover = FastFailover;
827 fprintf(stderr, "Timed out after %d seconds: fast failover\n",
834 fprintf(stderr, "WAL file not present yet.");
836 fprintf(stderr, " Checking for trigger file...");
837 fprintf(stderr, "\n");