]> granicus.if.org Git - postgresql/blob - src/backend/replication/basebackup.c
Prevent WAL files created by pg_basebackup -x/X from being archived again.
[postgresql] / src / backend / replication / basebackup.c
1 /*-------------------------------------------------------------------------
2  *
3  * basebackup.c
4  *        code for taking a base backup and streaming it to a standby
5  *
6  * Portions Copyright (c) 2010-2014, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *        src/backend/replication/basebackup.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 #include <time.h>
19
20 #include "access/xlog_internal.h"               /* for pg_start/stop_backup */
21 #include "catalog/catalog.h"
22 #include "catalog/pg_type.h"
23 #include "lib/stringinfo.h"
24 #include "libpq/libpq.h"
25 #include "libpq/pqformat.h"
26 #include "miscadmin.h"
27 #include "nodes/pg_list.h"
28 #include "pgtar.h"
29 #include "pgstat.h"
30 #include "replication/basebackup.h"
31 #include "replication/walsender.h"
32 #include "replication/walsender_private.h"
33 #include "storage/fd.h"
34 #include "storage/ipc.h"
35 #include "utils/builtins.h"
36 #include "utils/elog.h"
37 #include "utils/ps_status.h"
38 #include "utils/timestamp.h"
39
40
41 typedef struct
42 {
43         const char *label;
44         bool            progress;
45         bool            fastcheckpoint;
46         bool            nowait;
47         bool            includewal;
48         uint32          maxrate;
49 } basebackup_options;
50
51
52 static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
53 static int64 sendTablespace(char *path, bool sizeonly);
54 static bool sendFile(char *readfilename, char *tarfilename,
55                  struct stat * statbuf, bool missing_ok);
56 static void sendFileWithContent(const char *filename, const char *content);
57 static void _tarWriteHeader(const char *filename, const char *linktarget,
58                                 struct stat * statbuf);
59 static void send_int8_string(StringInfoData *buf, int64 intval);
60 static void SendBackupHeader(List *tablespaces);
61 static void base_backup_cleanup(int code, Datum arg);
62 static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
63 static void parse_basebackup_options(List *options, basebackup_options *opt);
64 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
65 static int      compareWalFileNames(const void *a, const void *b);
66 static void throttle(size_t increment);
67
68 /* Was the backup currently in-progress initiated in recovery mode? */
69 static bool backup_started_in_recovery = false;
70
71 /* Relative path of temporary statistics directory */
72 static char *statrelpath = NULL;
73
74 /*
75  * Size of each block sent into the tar stream for larger files.
76  */
77 #define TAR_SEND_SIZE 32768
78
79 /*
80  * How frequently to throttle, as a fraction of the specified rate-second.
81  */
82 #define THROTTLING_FREQUENCY    8
83
84 /* The actual number of bytes, transfer of which may cause sleep. */
85 static uint64 throttling_sample;
86
87 /* Amount of data already transfered but not yet throttled.  */
88 static int64 throttling_counter;
89
90 /* The minimum time required to transfer throttling_sample bytes. */
91 static int64 elapsed_min_unit;
92
93 /* The last check of the transfer rate. */
94 static int64 throttled_last;
95
96 typedef struct
97 {
98         char       *oid;
99         char       *path;
100         char       *rpath;                      /* relative path within PGDATA, or NULL */
101         int64           size;
102 } tablespaceinfo;
103
104
105 /*
106  * Called when ERROR or FATAL happens in perform_base_backup() after
107  * we have started the backup - make sure we end it!
108  */
109 static void
110 base_backup_cleanup(int code, Datum arg)
111 {
112         do_pg_abort_backup();
113 }
114
115 /*
116  * Actually do a base backup for the specified tablespaces.
117  *
118  * This is split out mainly to avoid complaints about "variable might be
119  * clobbered by longjmp" from stupider versions of gcc.
120  */
121 static void
122 perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
123 {
124         XLogRecPtr      startptr;
125         TimeLineID      starttli;
126         XLogRecPtr      endptr;
127         TimeLineID      endtli;
128         char       *labelfile;
129         int                     datadirpathlen;
130
131         datadirpathlen = strlen(DataDir);
132
133         backup_started_in_recovery = RecoveryInProgress();
134
135         startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
136                                                                   &labelfile);
137         /*
138          * Once do_pg_start_backup has been called, ensure that any failure causes
139          * us to abort the backup so we don't "leak" a backup counter. For this reason,
140          * *all* functionality between do_pg_start_backup() and do_pg_stop_backup()
141          * should be inside the error cleanup block!
142          */
143
144         PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
145         {
146                 List       *tablespaces = NIL;
147                 ListCell   *lc;
148                 struct dirent *de;
149                 tablespaceinfo *ti;
150
151                 SendXlogRecPtrResult(startptr, starttli);
152
153                 /*
154                  * Calculate the relative path of temporary statistics directory in order
155                  * to skip the files which are located in that directory later.
156                  */
157                 if (is_absolute_path(pgstat_stat_directory) &&
158                         strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
159                         statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
160                 else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
161                         statrelpath = psprintf("./%s", pgstat_stat_directory);
162                 else
163                         statrelpath = pgstat_stat_directory;
164
165                 /* Collect information about all tablespaces */
166                 while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
167                 {
168                         char            fullpath[MAXPGPATH];
169                         char            linkpath[MAXPGPATH];
170                         char       *relpath = NULL;
171                         int                     rllen;
172
173                         /* Skip special stuff */
174                         if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
175                                 continue;
176
177                         snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
178
179 #if defined(HAVE_READLINK) || defined(WIN32)
180                         rllen = readlink(fullpath, linkpath, sizeof(linkpath));
181                         if (rllen < 0)
182                         {
183                                 ereport(WARNING,
184                                                 (errmsg("could not read symbolic link \"%s\": %m",
185                                                                 fullpath)));
186                                 continue;
187                         }
188                         else if (rllen >= sizeof(linkpath))
189                         {
190                                 ereport(WARNING,
191                                                 (errmsg("symbolic link \"%s\" target is too long",
192                                                                 fullpath)));
193                                 continue;
194                         }
195                         linkpath[rllen] = '\0';
196
197                         /*
198                          * Relpath holds the relative path of the tablespace directory
199                          * when it's located within PGDATA, or NULL if it's located
200                          * elsewhere.
201                          */
202                         if (rllen > datadirpathlen &&
203                                 strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
204                                 IS_DIR_SEP(linkpath[datadirpathlen]))
205                                 relpath = linkpath + datadirpathlen + 1;
206
207                         ti = palloc(sizeof(tablespaceinfo));
208                         ti->oid = pstrdup(de->d_name);
209                         ti->path = pstrdup(linkpath);
210                         ti->rpath = relpath ? pstrdup(relpath) : NULL;
211                         ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
212                         tablespaces = lappend(tablespaces, ti);
213 #else
214
215                         /*
216                          * If the platform does not have symbolic links, it should not be
217                          * possible to have tablespaces - clearly somebody else created
218                          * them. Warn about it and ignore.
219                          */
220                         ereport(WARNING,
221                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
222                                   errmsg("tablespaces are not supported on this platform")));
223 #endif
224                 }
225
226                 /* Add a node for the base directory at the end */
227                 ti = palloc0(sizeof(tablespaceinfo));
228                 ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
229                 tablespaces = lappend(tablespaces, ti);
230
231                 /* Send tablespace header */
232                 SendBackupHeader(tablespaces);
233
234                 /* Setup and activate network throttling, if client requested it */
235                 if (opt->maxrate > 0)
236                 {
237                         throttling_sample =
238                                 (int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
239
240                         /*
241                          * The minimum amount of time for throttling_sample bytes to be
242                          * transfered.
243                          */
244                         elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
245
246                         /* Enable throttling. */
247                         throttling_counter = 0;
248
249                         /* The 'real data' starts now (header was ignored). */
250                         throttled_last = GetCurrentIntegerTimestamp();
251                 }
252                 else
253                 {
254                         /* Disable throttling. */
255                         throttling_counter = -1;
256                 }
257
258                 /* Send off our tablespaces one by one */
259                 foreach(lc, tablespaces)
260                 {
261                         tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
262                         StringInfoData buf;
263
264                         /* Send CopyOutResponse message */
265                         pq_beginmessage(&buf, 'H');
266                         pq_sendbyte(&buf, 0);           /* overall format */
267                         pq_sendint(&buf, 0, 2);         /* natts */
268                         pq_endmessage(&buf);
269
270                         if (ti->path == NULL)
271                         {
272                                 struct stat statbuf;
273
274                                 /* In the main tar, include the backup_label first... */
275                                 sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
276
277                                 /* ... then the bulk of the files ... */
278                                 sendDir(".", 1, false, tablespaces);
279
280                                 /* ... and pg_control after everything else. */
281                                 if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
282                                         ereport(ERROR,
283                                                         (errcode_for_file_access(),
284                                                          errmsg("could not stat control file \"%s\": %m",
285                                                                         XLOG_CONTROL_FILE)));
286                                 sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false);
287                         }
288                         else
289                                 sendTablespace(ti->path, false);
290
291                         /*
292                          * If we're including WAL, and this is the main data directory we
293                          * don't terminate the tar stream here. Instead, we will append
294                          * the xlog files below and terminate it then. This is safe since
295                          * the main data directory is always sent *last*.
296                          */
297                         if (opt->includewal && ti->path == NULL)
298                         {
299                                 Assert(lnext(lc) == NULL);
300                         }
301                         else
302                                 pq_putemptymessage('c');                /* CopyDone */
303                 }
304         }
305         PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
306
307         endptr = do_pg_stop_backup(labelfile, !opt->nowait, &endtli);
308
309         if (opt->includewal)
310         {
311                 /*
312                  * We've left the last tar file "open", so we can now append the
313                  * required WAL files to it.
314                  */
315                 char            pathbuf[MAXPGPATH];
316                 XLogSegNo       segno;
317                 XLogSegNo       startsegno;
318                 XLogSegNo       endsegno;
319                 struct stat statbuf;
320                 List       *historyFileList = NIL;
321                 List       *walFileList = NIL;
322                 char      **walFiles;
323                 int                     nWalFiles;
324                 char            firstoff[MAXFNAMELEN];
325                 char            lastoff[MAXFNAMELEN];
326                 DIR                *dir;
327                 struct dirent *de;
328                 int                     i;
329                 ListCell   *lc;
330                 TimeLineID      tli;
331
332                 /*
333                  * I'd rather not worry about timelines here, so scan pg_xlog and
334                  * include all WAL files in the range between 'startptr' and 'endptr',
335                  * regardless of the timeline the file is stamped with. If there are
336                  * some spurious WAL files belonging to timelines that don't belong in
337                  * this server's history, they will be included too. Normally there
338                  * shouldn't be such files, but if there are, there's little harm in
339                  * including them.
340                  */
341                 XLByteToSeg(startptr, startsegno);
342                 XLogFileName(firstoff, ThisTimeLineID, startsegno);
343                 XLByteToPrevSeg(endptr, endsegno);
344                 XLogFileName(lastoff, ThisTimeLineID, endsegno);
345
346                 dir = AllocateDir("pg_xlog");
347                 if (!dir)
348                         ereport(ERROR,
349                                  (errmsg("could not open directory \"%s\": %m", "pg_xlog")));
350                 while ((de = ReadDir(dir, "pg_xlog")) != NULL)
351                 {
352                         /* Does it look like a WAL segment, and is it in the range? */
353                         if (strlen(de->d_name) == 24 &&
354                                 strspn(de->d_name, "0123456789ABCDEF") == 24 &&
355                                 strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
356                                 strcmp(de->d_name + 8, lastoff + 8) <= 0)
357                         {
358                                 walFileList = lappend(walFileList, pstrdup(de->d_name));
359                         }
360                         /* Does it look like a timeline history file? */
361                         else if (strlen(de->d_name) == 8 + strlen(".history") &&
362                                          strspn(de->d_name, "0123456789ABCDEF") == 8 &&
363                                          strcmp(de->d_name + 8, ".history") == 0)
364                         {
365                                 historyFileList = lappend(historyFileList, pstrdup(de->d_name));
366                         }
367                 }
368                 FreeDir(dir);
369
370                 /*
371                  * Before we go any further, check that none of the WAL segments we
372                  * need were removed.
373                  */
374                 CheckXLogRemoved(startsegno, ThisTimeLineID);
375
376                 /*
377                  * Put the WAL filenames into an array, and sort. We send the files in
378                  * order from oldest to newest, to reduce the chance that a file is
379                  * recycled before we get a chance to send it over.
380                  */
381                 nWalFiles = list_length(walFileList);
382                 walFiles = palloc(nWalFiles * sizeof(char *));
383                 i = 0;
384                 foreach(lc, walFileList)
385                 {
386                         walFiles[i++] = lfirst(lc);
387                 }
388                 qsort(walFiles, nWalFiles, sizeof(char *), compareWalFileNames);
389
390                 /*
391                  * There must be at least one xlog file in the pg_xlog directory,
392                  * since we are doing backup-including-xlog.
393                  */
394                 if (nWalFiles < 1)
395                         ereport(ERROR,
396                                         (errmsg("could not find any WAL files")));
397
398                 /*
399                  * Sanity check: the first and last segment should cover startptr and
400                  * endptr, with no gaps in between.
401                  */
402                 XLogFromFileName(walFiles[0], &tli, &segno);
403                 if (segno != startsegno)
404                 {
405                         char            startfname[MAXFNAMELEN];
406
407                         XLogFileName(startfname, ThisTimeLineID, startsegno);
408                         ereport(ERROR,
409                                         (errmsg("could not find WAL file \"%s\"", startfname)));
410                 }
411                 for (i = 0; i < nWalFiles; i++)
412                 {
413                         XLogSegNo       currsegno = segno;
414                         XLogSegNo       nextsegno = segno + 1;
415
416                         XLogFromFileName(walFiles[i], &tli, &segno);
417                         if (!(nextsegno == segno || currsegno == segno))
418                         {
419                                 char            nextfname[MAXFNAMELEN];
420
421                                 XLogFileName(nextfname, ThisTimeLineID, nextsegno);
422                                 ereport(ERROR,
423                                           (errmsg("could not find WAL file \"%s\"", nextfname)));
424                         }
425                 }
426                 if (segno != endsegno)
427                 {
428                         char            endfname[MAXFNAMELEN];
429
430                         XLogFileName(endfname, ThisTimeLineID, endsegno);
431                         ereport(ERROR,
432                                         (errmsg("could not find WAL file \"%s\"", endfname)));
433                 }
434
435                 /* Ok, we have everything we need. Send the WAL files. */
436                 for (i = 0; i < nWalFiles; i++)
437                 {
438                         FILE       *fp;
439                         char            buf[TAR_SEND_SIZE];
440                         size_t          cnt;
441                         pgoff_t         len = 0;
442
443                         snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFiles[i]);
444                         XLogFromFileName(walFiles[i], &tli, &segno);
445
446                         fp = AllocateFile(pathbuf, "rb");
447                         if (fp == NULL)
448                         {
449                                 /*
450                                  * Most likely reason for this is that the file was already
451                                  * removed by a checkpoint, so check for that to get a better
452                                  * error message.
453                                  */
454                                 CheckXLogRemoved(segno, tli);
455
456                                 ereport(ERROR,
457                                                 (errcode_for_file_access(),
458                                                  errmsg("could not open file \"%s\": %m", pathbuf)));
459                         }
460
461                         if (fstat(fileno(fp), &statbuf) != 0)
462                                 ereport(ERROR,
463                                                 (errcode_for_file_access(),
464                                                  errmsg("could not stat file \"%s\": %m",
465                                                                 pathbuf)));
466                         if (statbuf.st_size != XLogSegSize)
467                         {
468                                 CheckXLogRemoved(segno, tli);
469                                 ereport(ERROR,
470                                                 (errcode_for_file_access(),
471                                         errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
472                         }
473
474                         /* send the WAL file itself */
475                         _tarWriteHeader(pathbuf, NULL, &statbuf);
476
477                         while ((cnt = fread(buf, 1, Min(sizeof(buf), XLogSegSize - len), fp)) > 0)
478                         {
479                                 CheckXLogRemoved(segno, tli);
480                                 /* Send the chunk as a CopyData message */
481                                 if (pq_putmessage('d', buf, cnt))
482                                         ereport(ERROR,
483                                                         (errmsg("base backup could not send data, aborting backup")));
484
485                                 len += cnt;
486                                 throttle(cnt);
487
488                                 if (len == XLogSegSize)
489                                         break;
490                         }
491
492                         if (len != XLogSegSize)
493                         {
494                                 CheckXLogRemoved(segno, tli);
495                                 ereport(ERROR,
496                                                 (errcode_for_file_access(),
497                                         errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
498                         }
499
500                         /* XLogSegSize is a multiple of 512, so no need for padding */
501
502                         FreeFile(fp);
503
504                         /*
505                          * Mark file as archived, otherwise files can get archived again
506                          * after promotion of a new node. This is in line with
507                          * walreceiver.c always doing a XLogArchiveForceDone() after a
508                          * complete segment.
509                          */
510                         StatusFilePath(pathbuf, walFiles[i], ".done");
511                         sendFileWithContent(pathbuf, "");
512                 }
513
514                 /*
515                  * Send timeline history files too. Only the latest timeline history
516                  * file is required for recovery, and even that only if there happens
517                  * to be a timeline switch in the first WAL segment that contains the
518                  * checkpoint record, or if we're taking a base backup from a standby
519                  * server and the target timeline changes while the backup is taken.
520                  * But they are small and highly useful for debugging purposes, so
521                  * better include them all, always.
522                  */
523                 foreach(lc, historyFileList)
524                 {
525                         char       *fname = lfirst(lc);
526
527                         snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
528
529                         if (lstat(pathbuf, &statbuf) != 0)
530                                 ereport(ERROR,
531                                                 (errcode_for_file_access(),
532                                                  errmsg("could not stat file \"%s\": %m", pathbuf)));
533
534                         sendFile(pathbuf, pathbuf, &statbuf, false);
535
536                         /* unconditionally mark file as archived */
537                         StatusFilePath(pathbuf, fname, ".done");
538                         sendFileWithContent(pathbuf, "");
539                 }
540
541                 /* Send CopyDone message for the last tar file */
542                 pq_putemptymessage('c');
543         }
544         SendXlogRecPtrResult(endptr, endtli);
545 }
546
547 /*
548  * qsort comparison function, to compare log/seg portion of WAL segment
549  * filenames, ignoring the timeline portion.
550  */
551 static int
552 compareWalFileNames(const void *a, const void *b)
553 {
554         char       *fna = *((char **) a);
555         char       *fnb = *((char **) b);
556
557         return strcmp(fna + 8, fnb + 8);
558 }
559
560 /*
561  * Parse the base backup options passed down by the parser
562  */
563 static void
564 parse_basebackup_options(List *options, basebackup_options *opt)
565 {
566         ListCell   *lopt;
567         bool            o_label = false;
568         bool            o_progress = false;
569         bool            o_fast = false;
570         bool            o_nowait = false;
571         bool            o_wal = false;
572         bool            o_maxrate = false;
573
574         MemSet(opt, 0, sizeof(*opt));
575         foreach(lopt, options)
576         {
577                 DefElem    *defel = (DefElem *) lfirst(lopt);
578
579                 if (strcmp(defel->defname, "label") == 0)
580                 {
581                         if (o_label)
582                                 ereport(ERROR,
583                                                 (errcode(ERRCODE_SYNTAX_ERROR),
584                                                  errmsg("duplicate option \"%s\"", defel->defname)));
585                         opt->label = strVal(defel->arg);
586                         o_label = true;
587                 }
588                 else if (strcmp(defel->defname, "progress") == 0)
589                 {
590                         if (o_progress)
591                                 ereport(ERROR,
592                                                 (errcode(ERRCODE_SYNTAX_ERROR),
593                                                  errmsg("duplicate option \"%s\"", defel->defname)));
594                         opt->progress = true;
595                         o_progress = true;
596                 }
597                 else if (strcmp(defel->defname, "fast") == 0)
598                 {
599                         if (o_fast)
600                                 ereport(ERROR,
601                                                 (errcode(ERRCODE_SYNTAX_ERROR),
602                                                  errmsg("duplicate option \"%s\"", defel->defname)));
603                         opt->fastcheckpoint = true;
604                         o_fast = true;
605                 }
606                 else if (strcmp(defel->defname, "nowait") == 0)
607                 {
608                         if (o_nowait)
609                                 ereport(ERROR,
610                                                 (errcode(ERRCODE_SYNTAX_ERROR),
611                                                  errmsg("duplicate option \"%s\"", defel->defname)));
612                         opt->nowait = true;
613                         o_nowait = true;
614                 }
615                 else if (strcmp(defel->defname, "wal") == 0)
616                 {
617                         if (o_wal)
618                                 ereport(ERROR,
619                                                 (errcode(ERRCODE_SYNTAX_ERROR),
620                                                  errmsg("duplicate option \"%s\"", defel->defname)));
621                         opt->includewal = true;
622                         o_wal = true;
623                 }
624                 else if (strcmp(defel->defname, "max_rate") == 0)
625                 {
626                         long            maxrate;
627
628                         if (o_maxrate)
629                                 ereport(ERROR,
630                                                 (errcode(ERRCODE_SYNTAX_ERROR),
631                                                  errmsg("duplicate option \"%s\"", defel->defname)));
632
633                         maxrate = intVal(defel->arg);
634                         if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
635                                 ereport(ERROR,
636                                                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
637                                                  errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
638                                 (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
639
640                         opt->maxrate = (uint32) maxrate;
641                         o_maxrate = true;
642                 }
643                 else
644                         elog(ERROR, "option \"%s\" not recognized",
645                                  defel->defname);
646         }
647         if (opt->label == NULL)
648                 opt->label = "base backup";
649 }
650
651
652 /*
653  * SendBaseBackup() - send a complete base backup.
654  *
655  * The function will put the system into backup mode like pg_start_backup()
656  * does, so that the backup is consistent even though we read directly from
657  * the filesystem, bypassing the buffer cache.
658  */
659 void
660 SendBaseBackup(BaseBackupCmd *cmd)
661 {
662         DIR                *dir;
663         basebackup_options opt;
664
665         parse_basebackup_options(cmd->options, &opt);
666
667         WalSndSetState(WALSNDSTATE_BACKUP);
668
669         if (update_process_title)
670         {
671                 char            activitymsg[50];
672
673                 snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
674                                  opt.label);
675                 set_ps_display(activitymsg, false);
676         }
677
678         /* Make sure we can open the directory with tablespaces in it */
679         dir = AllocateDir("pg_tblspc");
680         if (!dir)
681                 ereport(ERROR,
682                                 (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
683
684         perform_base_backup(&opt, dir);
685
686         FreeDir(dir);
687 }
688
689 static void
690 send_int8_string(StringInfoData *buf, int64 intval)
691 {
692         char            is[32];
693
694         sprintf(is, INT64_FORMAT, intval);
695         pq_sendint(buf, strlen(is), 4);
696         pq_sendbytes(buf, is, strlen(is));
697 }
698
699 static void
700 SendBackupHeader(List *tablespaces)
701 {
702         StringInfoData buf;
703         ListCell   *lc;
704
705         /* Construct and send the directory information */
706         pq_beginmessage(&buf, 'T'); /* RowDescription */
707         pq_sendint(&buf, 3, 2);         /* 3 fields */
708
709         /* First field - spcoid */
710         pq_sendstring(&buf, "spcoid");
711         pq_sendint(&buf, 0, 4);         /* table oid */
712         pq_sendint(&buf, 0, 2);         /* attnum */
713         pq_sendint(&buf, OIDOID, 4);    /* type oid */
714         pq_sendint(&buf, 4, 2);         /* typlen */
715         pq_sendint(&buf, 0, 4);         /* typmod */
716         pq_sendint(&buf, 0, 2);         /* format code */
717
718         /* Second field - spcpath */
719         pq_sendstring(&buf, "spclocation");
720         pq_sendint(&buf, 0, 4);
721         pq_sendint(&buf, 0, 2);
722         pq_sendint(&buf, TEXTOID, 4);
723         pq_sendint(&buf, -1, 2);
724         pq_sendint(&buf, 0, 4);
725         pq_sendint(&buf, 0, 2);
726
727         /* Third field - size */
728         pq_sendstring(&buf, "size");
729         pq_sendint(&buf, 0, 4);
730         pq_sendint(&buf, 0, 2);
731         pq_sendint(&buf, INT8OID, 4);
732         pq_sendint(&buf, 8, 2);
733         pq_sendint(&buf, 0, 4);
734         pq_sendint(&buf, 0, 2);
735         pq_endmessage(&buf);
736
737         foreach(lc, tablespaces)
738         {
739                 tablespaceinfo *ti = lfirst(lc);
740
741                 /* Send one datarow message */
742                 pq_beginmessage(&buf, 'D');
743                 pq_sendint(&buf, 3, 2); /* number of columns */
744                 if (ti->path == NULL)
745                 {
746                         pq_sendint(&buf, -1, 4);        /* Length = -1 ==> NULL */
747                         pq_sendint(&buf, -1, 4);
748                 }
749                 else
750                 {
751                         pq_sendint(&buf, strlen(ti->oid), 4);           /* length */
752                         pq_sendbytes(&buf, ti->oid, strlen(ti->oid));
753                         pq_sendint(&buf, strlen(ti->path), 4);          /* length */
754                         pq_sendbytes(&buf, ti->path, strlen(ti->path));
755                 }
756                 if (ti->size >= 0)
757                         send_int8_string(&buf, ti->size / 1024);
758                 else
759                         pq_sendint(&buf, -1, 4);        /* NULL */
760
761                 pq_endmessage(&buf);
762         }
763
764         /* Send a CommandComplete message */
765         pq_puttextmessage('C', "SELECT");
766 }
767
768 /*
769  * Send a single resultset containing just a single
770  * XLogRecPtr record (in text format)
771  */
772 static void
773 SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
774 {
775         StringInfoData buf;
776         char            str[MAXFNAMELEN];
777
778         pq_beginmessage(&buf, 'T'); /* RowDescription */
779         pq_sendint(&buf, 2, 2);         /* 2 fields */
780
781         /* Field headers */
782         pq_sendstring(&buf, "recptr");
783         pq_sendint(&buf, 0, 4);         /* table oid */
784         pq_sendint(&buf, 0, 2);         /* attnum */
785         pq_sendint(&buf, TEXTOID, 4);           /* type oid */
786         pq_sendint(&buf, -1, 2);
787         pq_sendint(&buf, 0, 4);
788         pq_sendint(&buf, 0, 2);
789
790         pq_sendstring(&buf, "tli");
791         pq_sendint(&buf, 0, 4);         /* table oid */
792         pq_sendint(&buf, 0, 2);         /* attnum */
793
794         /*
795          * int8 may seem like a surprising data type for this, but in thory int4
796          * would not be wide enough for this, as TimeLineID is unsigned.
797          */
798         pq_sendint(&buf, INT8OID, 4);           /* type oid */
799         pq_sendint(&buf, -1, 2);
800         pq_sendint(&buf, 0, 4);
801         pq_sendint(&buf, 0, 2);
802         pq_endmessage(&buf);
803
804         /* Data row */
805         pq_beginmessage(&buf, 'D');
806         pq_sendint(&buf, 2, 2);         /* number of columns */
807
808         snprintf(str, sizeof(str), "%X/%X", (uint32) (ptr >> 32), (uint32) ptr);
809         pq_sendint(&buf, strlen(str), 4);       /* length */
810         pq_sendbytes(&buf, str, strlen(str));
811
812         snprintf(str, sizeof(str), "%u", tli);
813         pq_sendint(&buf, strlen(str), 4);       /* length */
814         pq_sendbytes(&buf, str, strlen(str));
815         pq_endmessage(&buf);
816
817         /* Send a CommandComplete message */
818         pq_puttextmessage('C', "SELECT");
819 }
820
821 /*
822  * Inject a file with given name and content in the output tar stream.
823  */
824 static void
825 sendFileWithContent(const char *filename, const char *content)
826 {
827         struct stat statbuf;
828         int                     pad,
829                                 len;
830
831         len = strlen(content);
832
833         /*
834          * Construct a stat struct for the backup_label file we're injecting in
835          * the tar.
836          */
837         /* Windows doesn't have the concept of uid and gid */
838 #ifdef WIN32
839         statbuf.st_uid = 0;
840         statbuf.st_gid = 0;
841 #else
842         statbuf.st_uid = geteuid();
843         statbuf.st_gid = getegid();
844 #endif
845         statbuf.st_mtime = time(NULL);
846         statbuf.st_mode = S_IRUSR | S_IWUSR;
847         statbuf.st_size = len;
848
849         _tarWriteHeader(filename, NULL, &statbuf);
850         /* Send the contents as a CopyData message */
851         pq_putmessage('d', content, len);
852
853         /* Pad to 512 byte boundary, per tar format requirements */
854         pad = ((len + 511) & ~511) - len;
855         if (pad > 0)
856         {
857                 char            buf[512];
858
859                 MemSet(buf, 0, pad);
860                 pq_putmessage('d', buf, pad);
861         }
862 }
863
864 /*
865  * Include the tablespace directory pointed to by 'path' in the output tar
866  * stream.  If 'sizeonly' is true, we just calculate a total length and return
867  * it, without actually sending anything.
868  *
869  * Only used to send auxiliary tablespaces, not PGDATA.
870  */
871 static int64
872 sendTablespace(char *path, bool sizeonly)
873 {
874         int64           size;
875         char            pathbuf[MAXPGPATH];
876         struct stat statbuf;
877
878         /*
879          * 'path' points to the tablespace location, but we only want to include
880          * the version directory in it that belongs to us.
881          */
882         snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
883                          TABLESPACE_VERSION_DIRECTORY);
884
885         /*
886          * Store a directory entry in the tar file so we get the permissions
887          * right.
888          */
889         if (lstat(pathbuf, &statbuf) != 0)
890         {
891                 if (errno != ENOENT)
892                         ereport(ERROR,
893                                         (errcode_for_file_access(),
894                                          errmsg("could not stat file or directory \"%s\": %m",
895                                                         pathbuf)));
896
897                 /* If the tablespace went away while scanning, it's no error. */
898                 return 0;
899         }
900         if (!sizeonly)
901                 _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf);
902         size = 512;                                     /* Size of the header just added */
903
904         /* Send all the files in the tablespace version directory */
905         size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
906
907         return size;
908 }
909
910 /*
911  * Include all files from the given directory in the output tar stream. If
912  * 'sizeonly' is true, we just calculate a total length and return it, without
913  * actually sending anything.
914  *
915  * Omit any directory in the tablespaces list, to avoid backing up
916  * tablespaces twice when they were created inside PGDATA.
917  */
918 static int64
919 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
920 {
921         DIR                *dir;
922         struct dirent *de;
923         char            pathbuf[MAXPGPATH];
924         struct stat statbuf;
925         int64           size = 0;
926
927         dir = AllocateDir(path);
928         while ((de = ReadDir(dir, path)) != NULL)
929         {
930                 /* Skip special stuff */
931                 if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
932                         continue;
933
934                 /* Skip temporary files */
935                 if (strncmp(de->d_name,
936                                         PG_TEMP_FILE_PREFIX,
937                                         strlen(PG_TEMP_FILE_PREFIX)) == 0)
938                         continue;
939
940                 /* skip auto conf temporary file */
941                 if (strncmp(de->d_name,
942                                         PG_AUTOCONF_FILENAME ".tmp",
943                                         sizeof(PG_AUTOCONF_FILENAME) + 4) == 0)
944                         continue;
945
946                 /*
947                  * If there's a backup_label file, it belongs to a backup started by
948                  * the user with pg_start_backup(). It is *not* correct for this
949                  * backup, our backup_label is injected into the tar separately.
950                  */
951                 if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
952                         continue;
953
954                 /*
955                  * Check if the postmaster has signaled us to exit, and abort with an
956                  * error in that case. The error handler further up will call
957                  * do_pg_abort_backup() for us. Also check that if the backup was
958                  * started while still in recovery, the server wasn't promoted.
959                  * dp_pg_stop_backup() will check that too, but it's better to stop
960                  * the backup early than continue to the end and fail there.
961                  */
962                 CHECK_FOR_INTERRUPTS();
963                 if (RecoveryInProgress() != backup_started_in_recovery)
964                         ereport(ERROR,
965                                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
966                                          errmsg("the standby was promoted during online backup"),
967                                  errhint("This means that the backup being taken is corrupt "
968                                                  "and should not be used. "
969                                                  "Try taking another online backup.")));
970
971                 snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name);
972
973                 /* Skip postmaster.pid and postmaster.opts in the data directory */
974                 if (strcmp(pathbuf, "./postmaster.pid") == 0 ||
975                         strcmp(pathbuf, "./postmaster.opts") == 0)
976                         continue;
977
978                 /* Skip pg_control here to back up it last */
979                 if (strcmp(pathbuf, "./global/pg_control") == 0)
980                         continue;
981
982                 if (lstat(pathbuf, &statbuf) != 0)
983                 {
984                         if (errno != ENOENT)
985                                 ereport(ERROR,
986                                                 (errcode_for_file_access(),
987                                                  errmsg("could not stat file or directory \"%s\": %m",
988                                                                 pathbuf)));
989
990                         /* If the file went away while scanning, it's no error. */
991                         continue;
992                 }
993
994                 /*
995                  * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
996                  * even when stats_temp_directory is set because PGSS_TEXT_FILE is
997                  * always created there.
998                  */
999                 if ((statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0) ||
1000                   strncmp(de->d_name, PG_STAT_TMP_DIR, strlen(PG_STAT_TMP_DIR)) == 0)
1001                 {
1002                         if (!sizeonly)
1003                                 _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
1004                         size += 512;
1005                         continue;
1006                 }
1007
1008                 /*
1009                  * Skip pg_replslot, not useful to copy. But include it as an empty
1010                  * directory anyway, so we get permissions right.
1011                  */
1012                 if (strcmp(de->d_name, "pg_replslot") == 0)
1013                 {
1014                         if (!sizeonly)
1015                                 _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
1016                         size += 512;            /* Size of the header just added */
1017                         continue;
1018                 }
1019
1020                 /*
1021                  * We can skip pg_xlog, the WAL segments need to be fetched from the
1022                  * WAL archive anyway. But include it as an empty directory anyway, so
1023                  * we get permissions right.
1024                  */
1025                 if (strcmp(pathbuf, "./pg_xlog") == 0)
1026                 {
1027                         if (!sizeonly)
1028                         {
1029                                 /* If pg_xlog is a symlink, write it as a directory anyway */
1030 #ifndef WIN32
1031                                 if (S_ISLNK(statbuf.st_mode))
1032 #else
1033                                 if (pgwin32_is_junction(pathbuf))
1034 #endif
1035                                         statbuf.st_mode = S_IFDIR | S_IRWXU;
1036                                 _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
1037                         }
1038                         size += 512;            /* Size of the header just added */
1039
1040                         /*
1041                          * Also send archive_status directory (by hackishly reusing
1042                          * statbuf from above ...).
1043                          */
1044                         if (!sizeonly)
1045                                 _tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf);
1046                         size += 512;            /* Size of the header just added */
1047
1048                         continue;                       /* don't recurse into pg_xlog */
1049                 }
1050
1051                 /* Allow symbolic links in pg_tblspc only */
1052                 if (strcmp(path, "./pg_tblspc") == 0 &&
1053 #ifndef WIN32
1054                         S_ISLNK(statbuf.st_mode)
1055 #else
1056                         pgwin32_is_junction(pathbuf)
1057 #endif
1058                         )
1059                 {
1060 #if defined(HAVE_READLINK) || defined(WIN32)
1061                         char            linkpath[MAXPGPATH];
1062                         int                     rllen;
1063
1064                         rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
1065                         if (rllen < 0)
1066                                 ereport(ERROR,
1067                                                 (errcode_for_file_access(),
1068                                                  errmsg("could not read symbolic link \"%s\": %m",
1069                                                                 pathbuf)));
1070                         if (rllen >= sizeof(linkpath))
1071                                 ereport(ERROR,
1072                                                 (errmsg("symbolic link \"%s\" target is too long",
1073                                                                 pathbuf)));
1074                         linkpath[rllen] = '\0';
1075
1076                         if (!sizeonly)
1077                                 _tarWriteHeader(pathbuf + basepathlen + 1, linkpath, &statbuf);
1078                         size += 512;            /* Size of the header just added */
1079 #else
1080
1081                         /*
1082                          * If the platform does not have symbolic links, it should not be
1083                          * possible to have tablespaces - clearly somebody else created
1084                          * them. Warn about it and ignore.
1085                          */
1086                         ereport(WARNING,
1087                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1088                                   errmsg("tablespaces are not supported on this platform")));
1089                         continue;
1090 #endif   /* HAVE_READLINK */
1091                 }
1092                 else if (S_ISDIR(statbuf.st_mode))
1093                 {
1094                         bool            skip_this_dir = false;
1095                         ListCell   *lc;
1096
1097                         /*
1098                          * Store a directory entry in the tar file so we can get the
1099                          * permissions right.
1100                          */
1101                         if (!sizeonly)
1102                                 _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
1103                         size += 512;            /* Size of the header just added */
1104
1105                         /*
1106                          * Call ourselves recursively for a directory, unless it happens
1107                          * to be a separate tablespace located within PGDATA.
1108                          */
1109                         foreach(lc, tablespaces)
1110                         {
1111                                 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
1112
1113                                 /*
1114                                  * ti->rpath is the tablespace relative path within PGDATA, or
1115                                  * NULL if the tablespace has been properly located somewhere
1116                                  * else.
1117                                  *
1118                                  * Skip past the leading "./" in pathbuf when comparing.
1119                                  */
1120                                 if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
1121                                 {
1122                                         skip_this_dir = true;
1123                                         break;
1124                                 }
1125                         }
1126                         if (!skip_this_dir)
1127                                 size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
1128                 }
1129                 else if (S_ISREG(statbuf.st_mode))
1130                 {
1131                         bool            sent = false;
1132
1133                         if (!sizeonly)
1134                                 sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
1135                                                                 true);
1136
1137                         if (sent || sizeonly)
1138                         {
1139                                 /* Add size, rounded up to 512byte block */
1140                                 size += ((statbuf.st_size + 511) & ~511);
1141                                 size += 512;    /* Size of the header of the file */
1142                         }
1143                 }
1144                 else
1145                         ereport(WARNING,
1146                                         (errmsg("skipping special file \"%s\"", pathbuf)));
1147         }
1148         FreeDir(dir);
1149         return size;
1150 }
1151
1152 /*****
1153  * Functions for handling tar file format
1154  *
1155  * Copied from pg_dump, but modified to work with libpq for sending
1156  */
1157
1158
1159 /*
1160  * Maximum file size for a tar member: The limit inherent in the
1161  * format is 2^33-1 bytes (nearly 8 GB).  But we don't want to exceed
1162  * what we can represent in pgoff_t.
1163  */
1164 #define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1)
1165
1166 /*
1167  * Given the member, write the TAR header & send the file.
1168  *
1169  * If 'missing_ok' is true, will not throw an error if the file is not found.
1170  *
1171  * Returns true if the file was successfully sent, false if 'missing_ok',
1172  * and the file did not exist.
1173  */
1174 static bool
1175 sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
1176                  bool missing_ok)
1177 {
1178         FILE       *fp;
1179         char            buf[TAR_SEND_SIZE];
1180         size_t          cnt;
1181         pgoff_t         len = 0;
1182         size_t          pad;
1183
1184         fp = AllocateFile(readfilename, "rb");
1185         if (fp == NULL)
1186         {
1187                 if (errno == ENOENT && missing_ok)
1188                         return false;
1189                 ereport(ERROR,
1190                                 (errcode_for_file_access(),
1191                                  errmsg("could not open file \"%s\": %m", readfilename)));
1192         }
1193
1194         /*
1195          * Some compilers will throw a warning knowing this test can never be true
1196          * because pgoff_t can't exceed the compared maximum on their platform.
1197          */
1198         if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN)
1199                 ereport(ERROR,
1200                                 (errmsg("archive member \"%s\" too large for tar format",
1201                                                 tarfilename)));
1202
1203         _tarWriteHeader(tarfilename, NULL, statbuf);
1204
1205         while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
1206         {
1207                 /* Send the chunk as a CopyData message */
1208                 if (pq_putmessage('d', buf, cnt))
1209                         ereport(ERROR,
1210                            (errmsg("base backup could not send data, aborting backup")));
1211
1212                 len += cnt;
1213                 throttle(cnt);
1214
1215                 if (len >= statbuf->st_size)
1216                 {
1217                         /*
1218                          * Reached end of file. The file could be longer, if it was
1219                          * extended while we were sending it, but for a base backup we can
1220                          * ignore such extended data. It will be restored from WAL.
1221                          */
1222                         break;
1223                 }
1224         }
1225
1226         /* If the file was truncated while we were sending it, pad it with zeros */
1227         if (len < statbuf->st_size)
1228         {
1229                 MemSet(buf, 0, sizeof(buf));
1230                 while (len < statbuf->st_size)
1231                 {
1232                         cnt = Min(sizeof(buf), statbuf->st_size - len);
1233                         pq_putmessage('d', buf, cnt);
1234                         len += cnt;
1235                         throttle(cnt);
1236                 }
1237         }
1238
1239         /*
1240          * Pad to 512 byte boundary, per tar format requirements. (This small
1241          * piece of data is probably not worth throttling.)
1242          */
1243         pad = ((len + 511) & ~511) - len;
1244         if (pad > 0)
1245         {
1246                 MemSet(buf, 0, pad);
1247                 pq_putmessage('d', buf, pad);
1248         }
1249
1250         FreeFile(fp);
1251
1252         return true;
1253 }
1254
1255
1256 static void
1257 _tarWriteHeader(const char *filename, const char *linktarget,
1258                                 struct stat * statbuf)
1259 {
1260         char            h[512];
1261
1262         tarCreateHeader(h, filename, linktarget, statbuf->st_size,
1263                                         statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
1264                                         statbuf->st_mtime);
1265
1266         pq_putmessage('d', h, 512);
1267 }
1268
1269 /*
1270  * Increment the network transfer counter by the given number of bytes,
1271  * and sleep if necessary to comply with the requested network transfer
1272  * rate.
1273  */
1274 static void
1275 throttle(size_t increment)
1276 {
1277         int64           elapsed,
1278                                 elapsed_min,
1279                                 sleep;
1280         int                     wait_result;
1281
1282         if (throttling_counter < 0)
1283                 return;
1284
1285         throttling_counter += increment;
1286         if (throttling_counter < throttling_sample)
1287                 return;
1288
1289         /* Time elapsed since the last measurement (and possible wake up). */
1290         elapsed = GetCurrentIntegerTimestamp() - throttled_last;
1291         /* How much should have elapsed at minimum? */
1292         elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
1293         sleep = elapsed_min - elapsed;
1294         /* Only sleep if the transfer is faster than it should be. */
1295         if (sleep > 0)
1296         {
1297                 ResetLatch(&MyWalSnd->latch);
1298
1299                 /*
1300                  * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
1301                  * the maximum time to sleep. Thus the cast to long is safe.
1302                  */
1303                 wait_result = WaitLatch(&MyWalSnd->latch,
1304                                                          WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
1305                                                                 (long) (sleep / 1000));
1306         }
1307         else
1308         {
1309                 /*
1310                  * The actual transfer rate is below the limit.  A negative value
1311                  * would distort the adjustment of throttled_last.
1312                  */
1313                 wait_result = 0;
1314                 sleep = 0;
1315         }
1316
1317         /*
1318          * Only a whole multiple of throttling_sample was processed. The rest will
1319          * be done during the next call of this function.
1320          */
1321         throttling_counter %= throttling_sample;
1322
1323         /* Once the (possible) sleep has ended, new period starts. */
1324         if (wait_result & WL_TIMEOUT)
1325                 throttled_last += elapsed + sleep;
1326         else if (sleep > 0)
1327                 /* Sleep was necessary but might have been interrupted. */
1328                 throttled_last = GetCurrentIntegerTimestamp();
1329 }