1 /*-------------------------------------------------------------------------
4 * code for taking a base backup and streaming it to a standby
6 * Portions Copyright (c) 2010-2014, PostgreSQL Global Development Group
9 * src/backend/replication/basebackup.c
11 *-------------------------------------------------------------------------
15 #include <sys/types.h>
20 #include "access/xlog_internal.h" /* for pg_start/stop_backup */
21 #include "catalog/catalog.h"
22 #include "catalog/pg_type.h"
23 #include "lib/stringinfo.h"
24 #include "libpq/libpq.h"
25 #include "libpq/pqformat.h"
26 #include "miscadmin.h"
27 #include "nodes/pg_list.h"
30 #include "replication/basebackup.h"
31 #include "replication/walsender.h"
32 #include "replication/walsender_private.h"
33 #include "storage/fd.h"
34 #include "storage/ipc.h"
35 #include "utils/builtins.h"
36 #include "utils/elog.h"
37 #include "utils/ps_status.h"
38 #include "utils/timestamp.h"
52 static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
53 static int64 sendTablespace(char *path, bool sizeonly);
54 static bool sendFile(char *readfilename, char *tarfilename,
55 struct stat * statbuf, bool missing_ok);
56 static void sendFileWithContent(const char *filename, const char *content);
57 static void _tarWriteHeader(const char *filename, const char *linktarget,
58 struct stat * statbuf);
59 static void send_int8_string(StringInfoData *buf, int64 intval);
60 static void SendBackupHeader(List *tablespaces);
61 static void base_backup_cleanup(int code, Datum arg);
62 static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
63 static void parse_basebackup_options(List *options, basebackup_options *opt);
64 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
65 static int compareWalFileNames(const void *a, const void *b);
66 static void throttle(size_t increment);
68 /* Was the backup currently in-progress initiated in recovery mode? */
69 static bool backup_started_in_recovery = false;
71 /* Relative path of temporary statistics directory */
72 static char *statrelpath = NULL;
75 * Size of each block sent into the tar stream for larger files.
77 #define TAR_SEND_SIZE 32768
80 * How frequently to throttle, as a fraction of the specified rate-second.
82 #define THROTTLING_FREQUENCY 8
84 /* The actual number of bytes, transfer of which may cause sleep. */
85 static uint64 throttling_sample;
87 /* Amount of data already transfered but not yet throttled. */
88 static int64 throttling_counter;
90 /* The minimum time required to transfer throttling_sample bytes. */
91 static int64 elapsed_min_unit;
93 /* The last check of the transfer rate. */
94 static int64 throttled_last;
100 char *rpath; /* relative path within PGDATA, or NULL */
106 * Called when ERROR or FATAL happens in perform_base_backup() after
107 * we have started the backup - make sure we end it!
110 base_backup_cleanup(int code, Datum arg)
112 do_pg_abort_backup();
116 * Actually do a base backup for the specified tablespaces.
118 * This is split out mainly to avoid complaints about "variable might be
119 * clobbered by longjmp" from stupider versions of gcc.
122 perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
131 datadirpathlen = strlen(DataDir);
133 backup_started_in_recovery = RecoveryInProgress();
135 startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
138 * Once do_pg_start_backup has been called, ensure that any failure causes
139 * us to abort the backup so we don't "leak" a backup counter. For this reason,
140 * *all* functionality between do_pg_start_backup() and do_pg_stop_backup()
141 * should be inside the error cleanup block!
144 PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
146 List *tablespaces = NIL;
151 SendXlogRecPtrResult(startptr, starttli);
154 * Calculate the relative path of temporary statistics directory in order
155 * to skip the files which are located in that directory later.
157 if (is_absolute_path(pgstat_stat_directory) &&
158 strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
159 statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
160 else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
161 statrelpath = psprintf("./%s", pgstat_stat_directory);
163 statrelpath = pgstat_stat_directory;
165 /* Collect information about all tablespaces */
166 while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
168 char fullpath[MAXPGPATH];
169 char linkpath[MAXPGPATH];
170 char *relpath = NULL;
173 /* Skip special stuff */
174 if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
177 snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
179 #if defined(HAVE_READLINK) || defined(WIN32)
180 rllen = readlink(fullpath, linkpath, sizeof(linkpath));
184 (errmsg("could not read symbolic link \"%s\": %m",
188 else if (rllen >= sizeof(linkpath))
191 (errmsg("symbolic link \"%s\" target is too long",
195 linkpath[rllen] = '\0';
198 * Relpath holds the relative path of the tablespace directory
199 * when it's located within PGDATA, or NULL if it's located
202 if (rllen > datadirpathlen &&
203 strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
204 IS_DIR_SEP(linkpath[datadirpathlen]))
205 relpath = linkpath + datadirpathlen + 1;
207 ti = palloc(sizeof(tablespaceinfo));
208 ti->oid = pstrdup(de->d_name);
209 ti->path = pstrdup(linkpath);
210 ti->rpath = relpath ? pstrdup(relpath) : NULL;
211 ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
212 tablespaces = lappend(tablespaces, ti);
216 * If the platform does not have symbolic links, it should not be
217 * possible to have tablespaces - clearly somebody else created
218 * them. Warn about it and ignore.
221 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
222 errmsg("tablespaces are not supported on this platform")));
226 /* Add a node for the base directory at the end */
227 ti = palloc0(sizeof(tablespaceinfo));
228 ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
229 tablespaces = lappend(tablespaces, ti);
231 /* Send tablespace header */
232 SendBackupHeader(tablespaces);
234 /* Setup and activate network throttling, if client requested it */
235 if (opt->maxrate > 0)
238 (int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
241 * The minimum amount of time for throttling_sample bytes to be
244 elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
246 /* Enable throttling. */
247 throttling_counter = 0;
249 /* The 'real data' starts now (header was ignored). */
250 throttled_last = GetCurrentIntegerTimestamp();
254 /* Disable throttling. */
255 throttling_counter = -1;
258 /* Send off our tablespaces one by one */
259 foreach(lc, tablespaces)
261 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
264 /* Send CopyOutResponse message */
265 pq_beginmessage(&buf, 'H');
266 pq_sendbyte(&buf, 0); /* overall format */
267 pq_sendint(&buf, 0, 2); /* natts */
270 if (ti->path == NULL)
274 /* In the main tar, include the backup_label first... */
275 sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
277 /* ... then the bulk of the files ... */
278 sendDir(".", 1, false, tablespaces);
280 /* ... and pg_control after everything else. */
281 if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
283 (errcode_for_file_access(),
284 errmsg("could not stat control file \"%s\": %m",
285 XLOG_CONTROL_FILE)));
286 sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false);
289 sendTablespace(ti->path, false);
292 * If we're including WAL, and this is the main data directory we
293 * don't terminate the tar stream here. Instead, we will append
294 * the xlog files below and terminate it then. This is safe since
295 * the main data directory is always sent *last*.
297 if (opt->includewal && ti->path == NULL)
299 Assert(lnext(lc) == NULL);
302 pq_putemptymessage('c'); /* CopyDone */
305 PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
307 endptr = do_pg_stop_backup(labelfile, !opt->nowait, &endtli);
312 * We've left the last tar file "open", so we can now append the
313 * required WAL files to it.
315 char pathbuf[MAXPGPATH];
317 XLogSegNo startsegno;
320 List *historyFileList = NIL;
321 List *walFileList = NIL;
324 char firstoff[MAXFNAMELEN];
325 char lastoff[MAXFNAMELEN];
333 * I'd rather not worry about timelines here, so scan pg_xlog and
334 * include all WAL files in the range between 'startptr' and 'endptr',
335 * regardless of the timeline the file is stamped with. If there are
336 * some spurious WAL files belonging to timelines that don't belong in
337 * this server's history, they will be included too. Normally there
338 * shouldn't be such files, but if there are, there's little harm in
341 XLByteToSeg(startptr, startsegno);
342 XLogFileName(firstoff, ThisTimeLineID, startsegno);
343 XLByteToPrevSeg(endptr, endsegno);
344 XLogFileName(lastoff, ThisTimeLineID, endsegno);
346 dir = AllocateDir("pg_xlog");
349 (errmsg("could not open directory \"%s\": %m", "pg_xlog")));
350 while ((de = ReadDir(dir, "pg_xlog")) != NULL)
352 /* Does it look like a WAL segment, and is it in the range? */
353 if (strlen(de->d_name) == 24 &&
354 strspn(de->d_name, "0123456789ABCDEF") == 24 &&
355 strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
356 strcmp(de->d_name + 8, lastoff + 8) <= 0)
358 walFileList = lappend(walFileList, pstrdup(de->d_name));
360 /* Does it look like a timeline history file? */
361 else if (strlen(de->d_name) == 8 + strlen(".history") &&
362 strspn(de->d_name, "0123456789ABCDEF") == 8 &&
363 strcmp(de->d_name + 8, ".history") == 0)
365 historyFileList = lappend(historyFileList, pstrdup(de->d_name));
371 * Before we go any further, check that none of the WAL segments we
374 CheckXLogRemoved(startsegno, ThisTimeLineID);
377 * Put the WAL filenames into an array, and sort. We send the files in
378 * order from oldest to newest, to reduce the chance that a file is
379 * recycled before we get a chance to send it over.
381 nWalFiles = list_length(walFileList);
382 walFiles = palloc(nWalFiles * sizeof(char *));
384 foreach(lc, walFileList)
386 walFiles[i++] = lfirst(lc);
388 qsort(walFiles, nWalFiles, sizeof(char *), compareWalFileNames);
391 * There must be at least one xlog file in the pg_xlog directory,
392 * since we are doing backup-including-xlog.
396 (errmsg("could not find any WAL files")));
399 * Sanity check: the first and last segment should cover startptr and
400 * endptr, with no gaps in between.
402 XLogFromFileName(walFiles[0], &tli, &segno);
403 if (segno != startsegno)
405 char startfname[MAXFNAMELEN];
407 XLogFileName(startfname, ThisTimeLineID, startsegno);
409 (errmsg("could not find WAL file \"%s\"", startfname)));
411 for (i = 0; i < nWalFiles; i++)
413 XLogSegNo currsegno = segno;
414 XLogSegNo nextsegno = segno + 1;
416 XLogFromFileName(walFiles[i], &tli, &segno);
417 if (!(nextsegno == segno || currsegno == segno))
419 char nextfname[MAXFNAMELEN];
421 XLogFileName(nextfname, ThisTimeLineID, nextsegno);
423 (errmsg("could not find WAL file \"%s\"", nextfname)));
426 if (segno != endsegno)
428 char endfname[MAXFNAMELEN];
430 XLogFileName(endfname, ThisTimeLineID, endsegno);
432 (errmsg("could not find WAL file \"%s\"", endfname)));
435 /* Ok, we have everything we need. Send the WAL files. */
436 for (i = 0; i < nWalFiles; i++)
439 char buf[TAR_SEND_SIZE];
443 snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFiles[i]);
444 XLogFromFileName(walFiles[i], &tli, &segno);
446 fp = AllocateFile(pathbuf, "rb");
450 * Most likely reason for this is that the file was already
451 * removed by a checkpoint, so check for that to get a better
454 CheckXLogRemoved(segno, tli);
457 (errcode_for_file_access(),
458 errmsg("could not open file \"%s\": %m", pathbuf)));
461 if (fstat(fileno(fp), &statbuf) != 0)
463 (errcode_for_file_access(),
464 errmsg("could not stat file \"%s\": %m",
466 if (statbuf.st_size != XLogSegSize)
468 CheckXLogRemoved(segno, tli);
470 (errcode_for_file_access(),
471 errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
474 /* send the WAL file itself */
475 _tarWriteHeader(pathbuf, NULL, &statbuf);
477 while ((cnt = fread(buf, 1, Min(sizeof(buf), XLogSegSize - len), fp)) > 0)
479 CheckXLogRemoved(segno, tli);
480 /* Send the chunk as a CopyData message */
481 if (pq_putmessage('d', buf, cnt))
483 (errmsg("base backup could not send data, aborting backup")));
488 if (len == XLogSegSize)
492 if (len != XLogSegSize)
494 CheckXLogRemoved(segno, tli);
496 (errcode_for_file_access(),
497 errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
500 /* XLogSegSize is a multiple of 512, so no need for padding */
505 * Mark file as archived, otherwise files can get archived again
506 * after promotion of a new node. This is in line with
507 * walreceiver.c always doing a XLogArchiveForceDone() after a
510 StatusFilePath(pathbuf, walFiles[i], ".done");
511 sendFileWithContent(pathbuf, "");
515 * Send timeline history files too. Only the latest timeline history
516 * file is required for recovery, and even that only if there happens
517 * to be a timeline switch in the first WAL segment that contains the
518 * checkpoint record, or if we're taking a base backup from a standby
519 * server and the target timeline changes while the backup is taken.
520 * But they are small and highly useful for debugging purposes, so
521 * better include them all, always.
523 foreach(lc, historyFileList)
525 char *fname = lfirst(lc);
527 snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
529 if (lstat(pathbuf, &statbuf) != 0)
531 (errcode_for_file_access(),
532 errmsg("could not stat file \"%s\": %m", pathbuf)));
534 sendFile(pathbuf, pathbuf, &statbuf, false);
536 /* unconditionally mark file as archived */
537 StatusFilePath(pathbuf, fname, ".done");
538 sendFileWithContent(pathbuf, "");
541 /* Send CopyDone message for the last tar file */
542 pq_putemptymessage('c');
544 SendXlogRecPtrResult(endptr, endtli);
548 * qsort comparison function, to compare log/seg portion of WAL segment
549 * filenames, ignoring the timeline portion.
552 compareWalFileNames(const void *a, const void *b)
554 char *fna = *((char **) a);
555 char *fnb = *((char **) b);
557 return strcmp(fna + 8, fnb + 8);
561 * Parse the base backup options passed down by the parser
564 parse_basebackup_options(List *options, basebackup_options *opt)
567 bool o_label = false;
568 bool o_progress = false;
570 bool o_nowait = false;
572 bool o_maxrate = false;
574 MemSet(opt, 0, sizeof(*opt));
575 foreach(lopt, options)
577 DefElem *defel = (DefElem *) lfirst(lopt);
579 if (strcmp(defel->defname, "label") == 0)
583 (errcode(ERRCODE_SYNTAX_ERROR),
584 errmsg("duplicate option \"%s\"", defel->defname)));
585 opt->label = strVal(defel->arg);
588 else if (strcmp(defel->defname, "progress") == 0)
592 (errcode(ERRCODE_SYNTAX_ERROR),
593 errmsg("duplicate option \"%s\"", defel->defname)));
594 opt->progress = true;
597 else if (strcmp(defel->defname, "fast") == 0)
601 (errcode(ERRCODE_SYNTAX_ERROR),
602 errmsg("duplicate option \"%s\"", defel->defname)));
603 opt->fastcheckpoint = true;
606 else if (strcmp(defel->defname, "nowait") == 0)
610 (errcode(ERRCODE_SYNTAX_ERROR),
611 errmsg("duplicate option \"%s\"", defel->defname)));
615 else if (strcmp(defel->defname, "wal") == 0)
619 (errcode(ERRCODE_SYNTAX_ERROR),
620 errmsg("duplicate option \"%s\"", defel->defname)));
621 opt->includewal = true;
624 else if (strcmp(defel->defname, "max_rate") == 0)
630 (errcode(ERRCODE_SYNTAX_ERROR),
631 errmsg("duplicate option \"%s\"", defel->defname)));
633 maxrate = intVal(defel->arg);
634 if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
636 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
637 errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
638 (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
640 opt->maxrate = (uint32) maxrate;
644 elog(ERROR, "option \"%s\" not recognized",
647 if (opt->label == NULL)
648 opt->label = "base backup";
653 * SendBaseBackup() - send a complete base backup.
655 * The function will put the system into backup mode like pg_start_backup()
656 * does, so that the backup is consistent even though we read directly from
657 * the filesystem, bypassing the buffer cache.
660 SendBaseBackup(BaseBackupCmd *cmd)
663 basebackup_options opt;
665 parse_basebackup_options(cmd->options, &opt);
667 WalSndSetState(WALSNDSTATE_BACKUP);
669 if (update_process_title)
671 char activitymsg[50];
673 snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
675 set_ps_display(activitymsg, false);
678 /* Make sure we can open the directory with tablespaces in it */
679 dir = AllocateDir("pg_tblspc");
682 (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
684 perform_base_backup(&opt, dir);
690 send_int8_string(StringInfoData *buf, int64 intval)
694 sprintf(is, INT64_FORMAT, intval);
695 pq_sendint(buf, strlen(is), 4);
696 pq_sendbytes(buf, is, strlen(is));
700 SendBackupHeader(List *tablespaces)
705 /* Construct and send the directory information */
706 pq_beginmessage(&buf, 'T'); /* RowDescription */
707 pq_sendint(&buf, 3, 2); /* 3 fields */
709 /* First field - spcoid */
710 pq_sendstring(&buf, "spcoid");
711 pq_sendint(&buf, 0, 4); /* table oid */
712 pq_sendint(&buf, 0, 2); /* attnum */
713 pq_sendint(&buf, OIDOID, 4); /* type oid */
714 pq_sendint(&buf, 4, 2); /* typlen */
715 pq_sendint(&buf, 0, 4); /* typmod */
716 pq_sendint(&buf, 0, 2); /* format code */
718 /* Second field - spcpath */
719 pq_sendstring(&buf, "spclocation");
720 pq_sendint(&buf, 0, 4);
721 pq_sendint(&buf, 0, 2);
722 pq_sendint(&buf, TEXTOID, 4);
723 pq_sendint(&buf, -1, 2);
724 pq_sendint(&buf, 0, 4);
725 pq_sendint(&buf, 0, 2);
727 /* Third field - size */
728 pq_sendstring(&buf, "size");
729 pq_sendint(&buf, 0, 4);
730 pq_sendint(&buf, 0, 2);
731 pq_sendint(&buf, INT8OID, 4);
732 pq_sendint(&buf, 8, 2);
733 pq_sendint(&buf, 0, 4);
734 pq_sendint(&buf, 0, 2);
737 foreach(lc, tablespaces)
739 tablespaceinfo *ti = lfirst(lc);
741 /* Send one datarow message */
742 pq_beginmessage(&buf, 'D');
743 pq_sendint(&buf, 3, 2); /* number of columns */
744 if (ti->path == NULL)
746 pq_sendint(&buf, -1, 4); /* Length = -1 ==> NULL */
747 pq_sendint(&buf, -1, 4);
751 pq_sendint(&buf, strlen(ti->oid), 4); /* length */
752 pq_sendbytes(&buf, ti->oid, strlen(ti->oid));
753 pq_sendint(&buf, strlen(ti->path), 4); /* length */
754 pq_sendbytes(&buf, ti->path, strlen(ti->path));
757 send_int8_string(&buf, ti->size / 1024);
759 pq_sendint(&buf, -1, 4); /* NULL */
764 /* Send a CommandComplete message */
765 pq_puttextmessage('C', "SELECT");
769 * Send a single resultset containing just a single
770 * XLogRecPtr record (in text format)
773 SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
776 char str[MAXFNAMELEN];
778 pq_beginmessage(&buf, 'T'); /* RowDescription */
779 pq_sendint(&buf, 2, 2); /* 2 fields */
782 pq_sendstring(&buf, "recptr");
783 pq_sendint(&buf, 0, 4); /* table oid */
784 pq_sendint(&buf, 0, 2); /* attnum */
785 pq_sendint(&buf, TEXTOID, 4); /* type oid */
786 pq_sendint(&buf, -1, 2);
787 pq_sendint(&buf, 0, 4);
788 pq_sendint(&buf, 0, 2);
790 pq_sendstring(&buf, "tli");
791 pq_sendint(&buf, 0, 4); /* table oid */
792 pq_sendint(&buf, 0, 2); /* attnum */
795 * int8 may seem like a surprising data type for this, but in thory int4
796 * would not be wide enough for this, as TimeLineID is unsigned.
798 pq_sendint(&buf, INT8OID, 4); /* type oid */
799 pq_sendint(&buf, -1, 2);
800 pq_sendint(&buf, 0, 4);
801 pq_sendint(&buf, 0, 2);
805 pq_beginmessage(&buf, 'D');
806 pq_sendint(&buf, 2, 2); /* number of columns */
808 snprintf(str, sizeof(str), "%X/%X", (uint32) (ptr >> 32), (uint32) ptr);
809 pq_sendint(&buf, strlen(str), 4); /* length */
810 pq_sendbytes(&buf, str, strlen(str));
812 snprintf(str, sizeof(str), "%u", tli);
813 pq_sendint(&buf, strlen(str), 4); /* length */
814 pq_sendbytes(&buf, str, strlen(str));
817 /* Send a CommandComplete message */
818 pq_puttextmessage('C', "SELECT");
822 * Inject a file with given name and content in the output tar stream.
825 sendFileWithContent(const char *filename, const char *content)
831 len = strlen(content);
834 * Construct a stat struct for the backup_label file we're injecting in
837 /* Windows doesn't have the concept of uid and gid */
842 statbuf.st_uid = geteuid();
843 statbuf.st_gid = getegid();
845 statbuf.st_mtime = time(NULL);
846 statbuf.st_mode = S_IRUSR | S_IWUSR;
847 statbuf.st_size = len;
849 _tarWriteHeader(filename, NULL, &statbuf);
850 /* Send the contents as a CopyData message */
851 pq_putmessage('d', content, len);
853 /* Pad to 512 byte boundary, per tar format requirements */
854 pad = ((len + 511) & ~511) - len;
860 pq_putmessage('d', buf, pad);
865 * Include the tablespace directory pointed to by 'path' in the output tar
866 * stream. If 'sizeonly' is true, we just calculate a total length and return
867 * it, without actually sending anything.
869 * Only used to send auxiliary tablespaces, not PGDATA.
872 sendTablespace(char *path, bool sizeonly)
875 char pathbuf[MAXPGPATH];
879 * 'path' points to the tablespace location, but we only want to include
880 * the version directory in it that belongs to us.
882 snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
883 TABLESPACE_VERSION_DIRECTORY);
886 * Store a directory entry in the tar file so we get the permissions
889 if (lstat(pathbuf, &statbuf) != 0)
893 (errcode_for_file_access(),
894 errmsg("could not stat file or directory \"%s\": %m",
897 /* If the tablespace went away while scanning, it's no error. */
901 _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf);
902 size = 512; /* Size of the header just added */
904 /* Send all the files in the tablespace version directory */
905 size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
911 * Include all files from the given directory in the output tar stream. If
912 * 'sizeonly' is true, we just calculate a total length and return it, without
913 * actually sending anything.
915 * Omit any directory in the tablespaces list, to avoid backing up
916 * tablespaces twice when they were created inside PGDATA.
919 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
923 char pathbuf[MAXPGPATH];
927 dir = AllocateDir(path);
928 while ((de = ReadDir(dir, path)) != NULL)
930 /* Skip special stuff */
931 if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
934 /* Skip temporary files */
935 if (strncmp(de->d_name,
937 strlen(PG_TEMP_FILE_PREFIX)) == 0)
940 /* skip auto conf temporary file */
941 if (strncmp(de->d_name,
942 PG_AUTOCONF_FILENAME ".tmp",
943 sizeof(PG_AUTOCONF_FILENAME) + 4) == 0)
947 * If there's a backup_label file, it belongs to a backup started by
948 * the user with pg_start_backup(). It is *not* correct for this
949 * backup, our backup_label is injected into the tar separately.
951 if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
955 * Check if the postmaster has signaled us to exit, and abort with an
956 * error in that case. The error handler further up will call
957 * do_pg_abort_backup() for us. Also check that if the backup was
958 * started while still in recovery, the server wasn't promoted.
959 * dp_pg_stop_backup() will check that too, but it's better to stop
960 * the backup early than continue to the end and fail there.
962 CHECK_FOR_INTERRUPTS();
963 if (RecoveryInProgress() != backup_started_in_recovery)
965 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
966 errmsg("the standby was promoted during online backup"),
967 errhint("This means that the backup being taken is corrupt "
968 "and should not be used. "
969 "Try taking another online backup.")));
971 snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name);
973 /* Skip postmaster.pid and postmaster.opts in the data directory */
974 if (strcmp(pathbuf, "./postmaster.pid") == 0 ||
975 strcmp(pathbuf, "./postmaster.opts") == 0)
978 /* Skip pg_control here to back up it last */
979 if (strcmp(pathbuf, "./global/pg_control") == 0)
982 if (lstat(pathbuf, &statbuf) != 0)
986 (errcode_for_file_access(),
987 errmsg("could not stat file or directory \"%s\": %m",
990 /* If the file went away while scanning, it's no error. */
995 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
996 * even when stats_temp_directory is set because PGSS_TEXT_FILE is
997 * always created there.
999 if ((statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0) ||
1000 strncmp(de->d_name, PG_STAT_TMP_DIR, strlen(PG_STAT_TMP_DIR)) == 0)
1003 _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
1009 * Skip pg_replslot, not useful to copy. But include it as an empty
1010 * directory anyway, so we get permissions right.
1012 if (strcmp(de->d_name, "pg_replslot") == 0)
1015 _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
1016 size += 512; /* Size of the header just added */
1021 * We can skip pg_xlog, the WAL segments need to be fetched from the
1022 * WAL archive anyway. But include it as an empty directory anyway, so
1023 * we get permissions right.
1025 if (strcmp(pathbuf, "./pg_xlog") == 0)
1029 /* If pg_xlog is a symlink, write it as a directory anyway */
1031 if (S_ISLNK(statbuf.st_mode))
1033 if (pgwin32_is_junction(pathbuf))
1035 statbuf.st_mode = S_IFDIR | S_IRWXU;
1036 _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
1038 size += 512; /* Size of the header just added */
1041 * Also send archive_status directory (by hackishly reusing
1042 * statbuf from above ...).
1045 _tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf);
1046 size += 512; /* Size of the header just added */
1048 continue; /* don't recurse into pg_xlog */
1051 /* Allow symbolic links in pg_tblspc only */
1052 if (strcmp(path, "./pg_tblspc") == 0 &&
1054 S_ISLNK(statbuf.st_mode)
1056 pgwin32_is_junction(pathbuf)
1060 #if defined(HAVE_READLINK) || defined(WIN32)
1061 char linkpath[MAXPGPATH];
1064 rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
1067 (errcode_for_file_access(),
1068 errmsg("could not read symbolic link \"%s\": %m",
1070 if (rllen >= sizeof(linkpath))
1072 (errmsg("symbolic link \"%s\" target is too long",
1074 linkpath[rllen] = '\0';
1077 _tarWriteHeader(pathbuf + basepathlen + 1, linkpath, &statbuf);
1078 size += 512; /* Size of the header just added */
1082 * If the platform does not have symbolic links, it should not be
1083 * possible to have tablespaces - clearly somebody else created
1084 * them. Warn about it and ignore.
1087 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1088 errmsg("tablespaces are not supported on this platform")));
1090 #endif /* HAVE_READLINK */
1092 else if (S_ISDIR(statbuf.st_mode))
1094 bool skip_this_dir = false;
1098 * Store a directory entry in the tar file so we can get the
1099 * permissions right.
1102 _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
1103 size += 512; /* Size of the header just added */
1106 * Call ourselves recursively for a directory, unless it happens
1107 * to be a separate tablespace located within PGDATA.
1109 foreach(lc, tablespaces)
1111 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
1114 * ti->rpath is the tablespace relative path within PGDATA, or
1115 * NULL if the tablespace has been properly located somewhere
1118 * Skip past the leading "./" in pathbuf when comparing.
1120 if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
1122 skip_this_dir = true;
1127 size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
1129 else if (S_ISREG(statbuf.st_mode))
1134 sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
1137 if (sent || sizeonly)
1139 /* Add size, rounded up to 512byte block */
1140 size += ((statbuf.st_size + 511) & ~511);
1141 size += 512; /* Size of the header of the file */
1146 (errmsg("skipping special file \"%s\"", pathbuf)));
1153 * Functions for handling tar file format
1155 * Copied from pg_dump, but modified to work with libpq for sending
1160 * Maximum file size for a tar member: The limit inherent in the
1161 * format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed
1162 * what we can represent in pgoff_t.
1164 #define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1)
1167 * Given the member, write the TAR header & send the file.
1169 * If 'missing_ok' is true, will not throw an error if the file is not found.
1171 * Returns true if the file was successfully sent, false if 'missing_ok',
1172 * and the file did not exist.
1175 sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
1179 char buf[TAR_SEND_SIZE];
1184 fp = AllocateFile(readfilename, "rb");
1187 if (errno == ENOENT && missing_ok)
1190 (errcode_for_file_access(),
1191 errmsg("could not open file \"%s\": %m", readfilename)));
1195 * Some compilers will throw a warning knowing this test can never be true
1196 * because pgoff_t can't exceed the compared maximum on their platform.
1198 if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN)
1200 (errmsg("archive member \"%s\" too large for tar format",
1203 _tarWriteHeader(tarfilename, NULL, statbuf);
1205 while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
1207 /* Send the chunk as a CopyData message */
1208 if (pq_putmessage('d', buf, cnt))
1210 (errmsg("base backup could not send data, aborting backup")));
1215 if (len >= statbuf->st_size)
1218 * Reached end of file. The file could be longer, if it was
1219 * extended while we were sending it, but for a base backup we can
1220 * ignore such extended data. It will be restored from WAL.
1226 /* If the file was truncated while we were sending it, pad it with zeros */
1227 if (len < statbuf->st_size)
1229 MemSet(buf, 0, sizeof(buf));
1230 while (len < statbuf->st_size)
1232 cnt = Min(sizeof(buf), statbuf->st_size - len);
1233 pq_putmessage('d', buf, cnt);
1240 * Pad to 512 byte boundary, per tar format requirements. (This small
1241 * piece of data is probably not worth throttling.)
1243 pad = ((len + 511) & ~511) - len;
1246 MemSet(buf, 0, pad);
1247 pq_putmessage('d', buf, pad);
1257 _tarWriteHeader(const char *filename, const char *linktarget,
1258 struct stat * statbuf)
1262 tarCreateHeader(h, filename, linktarget, statbuf->st_size,
1263 statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
1266 pq_putmessage('d', h, 512);
1270 * Increment the network transfer counter by the given number of bytes,
1271 * and sleep if necessary to comply with the requested network transfer
1275 throttle(size_t increment)
1282 if (throttling_counter < 0)
1285 throttling_counter += increment;
1286 if (throttling_counter < throttling_sample)
1289 /* Time elapsed since the last measurement (and possible wake up). */
1290 elapsed = GetCurrentIntegerTimestamp() - throttled_last;
1291 /* How much should have elapsed at minimum? */
1292 elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
1293 sleep = elapsed_min - elapsed;
1294 /* Only sleep if the transfer is faster than it should be. */
1297 ResetLatch(&MyWalSnd->latch);
1300 * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
1301 * the maximum time to sleep. Thus the cast to long is safe.
1303 wait_result = WaitLatch(&MyWalSnd->latch,
1304 WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
1305 (long) (sleep / 1000));
1310 * The actual transfer rate is below the limit. A negative value
1311 * would distort the adjustment of throttled_last.
1318 * Only a whole multiple of throttling_sample was processed. The rest will
1319 * be done during the next call of this function.
1321 throttling_counter %= throttling_sample;
1323 /* Once the (possible) sleep has ended, new period starts. */
1324 if (wait_result & WL_TIMEOUT)
1325 throttled_last += elapsed + sleep;
1327 /* Sleep was necessary but might have been interrupted. */
1328 throttled_last = GetCurrentIntegerTimestamp();