From 72d422a5227ef6f76f412486a395aba9f53bf3f0 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Tue, 12 May 2015 09:29:10 -0400 Subject: [PATCH] Map basebackup tablespaces using a tablespace_map file Windows can't reliably restore symbolic links from a tar format, so instead during backup start we create a tablespace_map file, which is used by the restoring postgres to create the correct links in pg_tblspc. The backup protocol also now has an option to request this file to be included in the backup stream, and this is used by pg_basebackup when operating in tar mode. This is done on all platforms, not just Windows. This means that pg_basebackup will not not work in tar mode against 9.4 and older servers, as this protocol option isn't implemented there. Amit Kapila, reviewed by Dilip Kumar, with a little editing from me. --- doc/src/sgml/backup.sgml | 32 +- doc/src/sgml/func.sgml | 14 +- doc/src/sgml/protocol.sgml | 15 +- doc/src/sgml/ref/pg_basebackup.sgml | 14 +- src/backend/access/transam/xlog.c | 387 +++++++++++++++++++++++-- src/backend/access/transam/xlogfuncs.c | 12 +- src/backend/replication/basebackup.c | 138 ++++----- src/backend/replication/repl_gram.y | 16 +- src/backend/replication/repl_scanner.l | 1 + src/bin/pg_basebackup/pg_basebackup.c | 5 +- src/include/access/xlog.h | 9 +- src/include/replication/basebackup.h | 10 + 12 files changed, 519 insertions(+), 134 deletions(-) diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml index e25e0d0edf..def43a21da 100644 --- a/doc/src/sgml/backup.sgml +++ b/doc/src/sgml/backup.sgml @@ -836,8 +836,11 @@ SELECT pg_start_backup('label'); pg_start_backup creates a backup label file, called backup_label, in the cluster directory with information about your backup, including the start time and label - string. The file is critical to the integrity of the backup, should - you need to restore from it. + string. The function also creates a tablespace map file, + called tablespace_map, in the cluster directory with + information about tablespace symbolic links in pg_tblspc/ + if one or more such link is present. Both files are critical to the + integrity of the backup, should you need to restore from it. @@ -965,17 +968,20 @@ SELECT pg_stop_backup(); It's also worth noting that the pg_start_backup function - makes a file named backup_label in the database cluster - directory, which is removed by pg_stop_backup. - This file will of course be archived as a part of your backup dump file. - The backup label file includes the label string you gave to - pg_start_backup, as well as the time at which - pg_start_backup was run, and the name of the starting WAL - file. In case of confusion it is therefore possible to look inside a - backup dump file and determine exactly which backup session the dump file - came from. However, this file is not merely for your information; its - presence and contents are critical to the proper operation of the system's - recovery process. + makes files named backup_label and + tablesapce_map in the database cluster directory, + which are removed by pg_stop_backup. These files will of + course be archived as a part of your backup dump file. The backup label + file includes the label string you gave to pg_start_backup, + as well as the time at which pg_start_backup was run, and + the name of the starting WAL file. In case of confusion it is therefore + possible to look inside a backup dump file and determine exactly which + backup session the dump file came from. The tablespace map file includes + the symbolic link names as they exist in the directory + pg_tblspc/ and the full path of each symbolic link. + These files are not merely for your information; their presence and + contents are critical to the proper operation of the system's recovery + process. diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 1ee4f634d3..bf8d72e9ff 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -16591,11 +16591,12 @@ SELECT set_config('log_statement_stats', 'off', false); pg_start_backup accepts an arbitrary user-defined label for the backup. (Typically this would be the name under which the backup dump file will be stored.) The function - writes a backup label file (backup_label) into the - database cluster's data directory, performs a checkpoint, - and then returns the backup's starting transaction log location as text. - The user can ignore this result value, but it is - provided in case it is useful. + writes a backup label file (backup_label) and, if there + are any links in the pg_tblspc/ directory, a tablespace map + file (tablespace_map) into the database cluster's data + directory, performs a checkpoint, and then returns the backup's starting + transaction log location as text. The user can ignore this result value, + but it is provided in case it is useful. postgres=# select pg_start_backup('label_goes_here'); pg_start_backup @@ -16610,7 +16611,8 @@ postgres=# select pg_start_backup('label_goes_here'); - pg_stop_backup removes the label file created by + pg_stop_backup removes the label file and, if it exists, + the tablespace_map file created by pg_start_backup, and creates a backup history file in the transaction log archive area. The history file includes the label given to pg_start_backup, the starting and ending transaction log locations for diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index ac13d3201c..d985204566 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1882,7 +1882,7 @@ The commands accepted in walsender mode are: - BASE_BACKUP [LABEL 'label'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE rate] + BASE_BACKUP [LABEL 'label'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE rate] [TABLESPACE_MAP] BASE_BACKUP @@ -1968,6 +1968,19 @@ The commands accepted in walsender mode are: + + + TABLESPACE_MAP + + + Include information about symbolic links present in the directory + pg_tblspc in a file named + tablespace_map. The tablespace map file includes + each symbolic link name as it exists in the directory + pg_tblspc/ and the full path of that symbolic link. + + + diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 642fccf325..07d3a5a1dc 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -587,11 +587,23 @@ PostgreSQL documentation tablespaces. + + When tar format mode is used, it is the user's responsibility to unpack each + tar file before starting postgres. If there are additional tablespaces, the + tar files for them need to be unpacked in the correct locations. In this + case the symbolic links for those tablespaces will be created by Postgres + according to the contents of the tablespace_map file that is + included in the base.tar file. + + pg_basebackup works with servers of the same or an older major version, down to 9.1. However, WAL streaming mode (-X - stream) only works with server version 9.3 and later. + stream) only works with server version 9.3 and later, and tar format mode + (--format=tar) of the current version only works with server version 9.5 + or later. + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 6f7e3bd96b..5f0551a3cb 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -42,6 +42,7 @@ #include "pgstat.h" #include "postmaster/bgwriter.h" #include "postmaster/startup.h" +#include "replication/basebackup.h" #include "replication/logical.h" #include "replication/slot.h" #include "replication/origin.h" @@ -824,6 +825,8 @@ static void xlog_outdesc(StringInfo buf, XLogReaderState *record); static void pg_start_backup_callback(int code, Datum arg); static bool read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired, bool *backupFromStandby); +static bool read_tablespace_map(List **tablespaces); + static void rm_redo_error_callback(void *arg); static int get_sync_bit(int method); @@ -5917,6 +5920,7 @@ StartupXLOG(void) bool wasShutdown; bool reachedStopPoint = false; bool haveBackupLabel = false; + bool haveTblspcMap = false; XLogRecPtr RecPtr, checkPointLoc, EndOfLog; @@ -6001,16 +6005,6 @@ StartupXLOG(void) */ ValidateXLOGDirectoryStructure(); - /* - * Clear out any old relcache cache files. This is *necessary* if we do - * any WAL replay, since that would probably result in the cache files - * being out of sync with database reality. In theory we could leave them - * in place if the database had been cleanly shut down, but it seems - * safest to just remove them always and let them be rebuilt during the - * first backend startup. - */ - RelationCacheInitFileRemove(); - /* * Initialize on the assumption we want to recover to the latest timeline * that's active according to pg_control. @@ -6080,6 +6074,8 @@ StartupXLOG(void) if (read_backup_label(&checkPointLoc, &backupEndRequired, &backupFromStandby)) { + List *tablespaces = NIL; + /* * Archive recovery was requested, and thanks to the backup label * file, we know how far we need to replay to reach consistency. Enter @@ -6124,6 +6120,59 @@ StartupXLOG(void) errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir))); wasShutdown = false; /* keep compiler quiet */ } + + /* read the tablespace_map file if present and create symlinks. */ + if (read_tablespace_map(&tablespaces)) + { + ListCell *lc; + struct stat st; + + foreach(lc, tablespaces) + { + tablespaceinfo *ti = lfirst(lc); + char *linkloc; + + linkloc = psprintf("pg_tblspc/%s", ti->oid); + + /* + * Remove the existing symlink if any and Create the symlink + * under PGDATA. We need to use rmtree instead of rmdir as + * the link location might contain directories or files + * corresponding to the actual path. Some tar utilities do + * things that way while extracting symlinks. + */ + if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode)) + { + if (!rmtree(linkloc,true)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove directory \"%s\": %m", + linkloc))); + } + else + { + if (unlink(linkloc) < 0 && errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove symbolic link \"%s\": %m", + linkloc))); + } + + if (symlink(ti->path, linkloc) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create symbolic link \"%s\": %m", + linkloc))); + + pfree(ti->oid); + pfree(ti->path); + pfree(ti); + } + + /* set flag to delete it later */ + haveTblspcMap = true; + } + /* set flag to delete it later */ haveBackupLabel = true; } @@ -6197,6 +6246,20 @@ StartupXLOG(void) wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN); } + /* + * Clear out any old relcache cache files. This is *necessary* if we do + * any WAL replay, since that would probably result in the cache files + * being out of sync with database reality. In theory we could leave them + * in place if the database had been cleanly shut down, but it seems + * safest to just remove them always and let them be rebuilt during the + * first backend startup. These files needs to be removed from all + * directories including pg_tblspc, however the symlinks are created + * only after reading tablesapce_map file in case of archive recovery + * from backup, so needs to clear old relcache files here after creating + * symlinks. + */ + RelationCacheInitFileRemove(); + /* * If the location of the checkpoint record is not on the expected * timeline in the history of the requested timeline, we cannot proceed: @@ -6466,6 +6529,23 @@ StartupXLOG(void) BACKUP_LABEL_FILE, BACKUP_LABEL_OLD))); } + /* + * If there was a tablespace_map file, it's done its job and the + * symlinks have been created. We must get rid of the map file + * so that if we crash during recovery, we don't create symlinks + * again. It seems prudent though to just rename the file out of + * the way rather than delete it completely. + */ + if (haveTblspcMap) + { + unlink(TABLESPACE_MAP_OLD); + if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + TABLESPACE_MAP, TABLESPACE_MAP_OLD))); + } + /* Check that the GUCs used to generate the WAL allow recovery */ CheckRequiredParameterValues(); @@ -9610,16 +9690,27 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno) * * There are two kind of backups: exclusive and non-exclusive. An exclusive * backup is started with pg_start_backup(), and there can be only one active - * at a time. The backup label file of an exclusive backup is written to - * $PGDATA/backup_label, and it is removed by pg_stop_backup(). + * at a time. The backup and tablespace map files of an exclusive backup are + * written to $PGDATA/backup_label and $PGDATA/tablespace_map, and they are + * removed by pg_stop_backup(). * * A non-exclusive backup is used for the streaming base backups (see * src/backend/replication/basebackup.c). The difference to exclusive backups - * is that the backup label file is not written to disk. Instead, its would-be - * contents are returned in *labelfile, and the caller is responsible for - * including it in the backup archive as 'backup_label'. There can be many - * non-exclusive backups active at the same time, and they don't conflict - * with an exclusive backup either. + * is that the backup label and tablespace map files are not written to disk. + * Instead, there would-be contents are returned in *labelfile and *tblspcmapfile, + * and the caller is responsible for including them in the backup archive as + * 'backup_label' and 'tablespace_map'. There can be many non-exclusive backups + * active at the same time, and they don't conflict with an exclusive backup + * either. + * + * tblspcmapfile is required mainly for tar format in windows as native windows + * utilities are not able to create symlinks while extracting files from tar. + * However for consistency, the same is used for all platforms. + * + * needtblspcmapfile is true for the cases (exclusive backup and for + * non-exclusive backup only when tar format is used for taking backup) + * when backup needs to generate tablespace_map file, it is used to + * embed escape character before newline character in tablespace path. * * Returns the minimum WAL position that must be present to restore from this * backup, and the corresponding timeline ID in *starttli_p. @@ -9632,7 +9723,9 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno) */ XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, - char **labelfile) + char **labelfile, DIR *tblspcdir, List **tablespaces, + char **tblspcmapfile, bool infotbssize, + bool needtblspcmapfile) { bool exclusive = (labelfile == NULL); bool backup_started_in_recovery = false; @@ -9646,6 +9739,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, struct stat stat_buf; FILE *fp; StringInfoData labelfbuf; + StringInfoData tblspc_mapfbuf; backup_started_in_recovery = RecoveryInProgress(); @@ -9717,6 +9811,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive)); { bool gotUniqueStartpoint = false; + struct dirent *de; + tablespaceinfo *ti; + int datadirpathlen; /* * Force an XLOG file switch before the checkpoint, to ensure that the @@ -9836,6 +9933,98 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, XLByteToSeg(startpoint, _logSegNo); XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo); + /* + * Construct tablespace_map file + */ + initStringInfo(&tblspc_mapfbuf); + + datadirpathlen = strlen(DataDir); + + /* Collect information about all tablespaces */ + while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) + { + char fullpath[MAXPGPATH]; + char linkpath[MAXPGPATH]; + char *relpath = NULL; + int rllen; + StringInfoData buflinkpath; + char *s = linkpath; + + /* Skip special stuff */ + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) + continue; + + snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name); + +#if defined(HAVE_READLINK) || defined(WIN32) + rllen = readlink(fullpath, linkpath, sizeof(linkpath)); + if (rllen < 0) + { + ereport(WARNING, + (errmsg("could not read symbolic link \"%s\": %m", + fullpath))); + continue; + } + else if (rllen >= sizeof(linkpath)) + { + ereport(WARNING, + (errmsg("symbolic link \"%s\" target is too long", + fullpath))); + continue; + } + linkpath[rllen] = '\0'; + + /* + * Add the escape character '\\' before newline in a string + * to ensure that we can distinguish between the newline in + * the tablespace path and end of line while reading + * tablespace_map file during archive recovery. + */ + initStringInfo(&buflinkpath); + + while (*s) + { + if ((*s == '\n' || *s == '\r') && needtblspcmapfile) + appendStringInfoChar(&buflinkpath, '\\'); + appendStringInfoChar(&buflinkpath, *s++); + } + + + /* + * Relpath holds the relative path of the tablespace directory + * when it's located within PGDATA, or NULL if it's located + * elsewhere. + */ + if (rllen > datadirpathlen && + strncmp(linkpath, DataDir, datadirpathlen) == 0 && + IS_DIR_SEP(linkpath[datadirpathlen])) + relpath = linkpath + datadirpathlen + 1; + + ti = palloc(sizeof(tablespaceinfo)); + ti->oid = pstrdup(de->d_name); + ti->path = pstrdup(buflinkpath.data); + ti->rpath = relpath ? pstrdup(relpath) : NULL; + ti->size = infotbssize ? sendTablespace(fullpath, true) : -1; + + if(tablespaces) + *tablespaces = lappend(*tablespaces, ti); + + appendStringInfo(&tblspc_mapfbuf, "%s %s\n", ti->oid, ti->path); + + pfree(buflinkpath.data); +#else + + /* + * If the platform does not have symbolic links, it should not be + * possible to have tablespaces - clearly somebody else created + * them. Warn about it and ignore. + */ + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("tablespaces are not supported on this platform"))); +#endif + } + /* * Construct backup label file */ @@ -9899,9 +10088,51 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, errmsg("could not write file \"%s\": %m", BACKUP_LABEL_FILE))); pfree(labelfbuf.data); + + /* Write backup tablespace_map file. */ + if (tblspc_mapfbuf.len > 0) + { + if (stat(TABLESPACE_MAP, &stat_buf) != 0) + { + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + TABLESPACE_MAP))); + } + else + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("a backup is already in progress"), + errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.", + TABLESPACE_MAP))); + + fp = AllocateFile(TABLESPACE_MAP, "w"); + + if (!fp) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", + TABLESPACE_MAP))); + if (fwrite(tblspc_mapfbuf.data, tblspc_mapfbuf.len, 1, fp) != 1 || + fflush(fp) != 0 || + pg_fsync(fileno(fp)) != 0 || + ferror(fp) || + FreeFile(fp)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", + TABLESPACE_MAP))); + } + + pfree(tblspc_mapfbuf.data); } else + { *labelfile = labelfbuf.data; + if (tblspc_mapfbuf.len > 0) + *tblspcmapfile = tblspc_mapfbuf.data; + } } PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive)); @@ -10072,6 +10303,12 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", BACKUP_LABEL_FILE))); + + /* + * Remove tablespace_map file if present, it is created + * only if there are tablespaces. + */ + unlink(TABLESPACE_MAP); } /* @@ -10471,6 +10708,86 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired, return true; } +/* + * read_tablespace_map: check to see if a tablespace_map file is present + * + * If we see a tablespace_map file during recovery, we assume that we are + * recovering from a backup dump file, and we therefore need to create symlinks + * as per the information present in tablespace_map file. + * + * Returns TRUE if a tablespace_map file was found (and fills the link + * information for all the tablespace links present in file); returns FALSE + * if not. + */ +static bool +read_tablespace_map(List **tablespaces) +{ + tablespaceinfo *ti; + FILE *lfp; + char tbsoid[MAXPGPATH]; + char *tbslinkpath; + char str[MAXPGPATH]; + int ch, prev_ch = -1, + i = 0, n; + + /* + * See if tablespace_map file is present + */ + lfp = AllocateFile(TABLESPACE_MAP, "r"); + if (!lfp) + { + if (errno != ENOENT) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + TABLESPACE_MAP))); + return false; /* it's not there, all is fine */ + } + + /* + * Read and parse the link name and path lines from tablespace_map file + * (this code is pretty crude, but we are not expecting any variability + * in the file format). While taking backup we embed escape character + * '\\' before newline in tablespace path, so that during reading of + * tablespace_map file, we could distinguish newline in tablespace path + * and end of line. Now while reading tablespace_map file, remove the + * escape character that has been added in tablespace path during backup. + */ + while ((ch = fgetc(lfp)) != EOF) + { + if ((ch == '\n' || ch == '\r') && prev_ch != '\\') + { + str[i] = '\0'; + if (sscanf(str, "%s %n", tbsoid, &n) != 1) + ereport(FATAL, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("invalid data in file \"%s\"", TABLESPACE_MAP))); + tbslinkpath = str + n; + i = 0; + + ti = palloc(sizeof(tablespaceinfo)); + ti->oid = pstrdup(tbsoid); + ti->path = pstrdup(tbslinkpath); + + *tablespaces = lappend(*tablespaces, ti); + continue; + } + else if ((ch == '\n' || ch == '\r') && prev_ch == '\\') + str[i-1] = ch; + else + str[i++] = ch; + prev_ch = ch; + } + + if (ferror(lfp) || FreeFile(lfp)) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + TABLESPACE_MAP))); + + return true; +} + /* * Error context callback for errors occurring during rm_redo(). */ @@ -10502,11 +10819,16 @@ BackupInProgress(void) } /* - * CancelBackup: rename the "backup_label" file to cancel backup mode + * CancelBackup: rename the "backup_label" and "tablespace_map" + * files to cancel backup mode * * If the "backup_label" file exists, it will be renamed to "backup_label.old". - * Note that this will render an online backup in progress useless. - * To correctly finish an online backup, pg_stop_backup must be called. + * Similarly, if the "tablespace_map" file exists, it will be renamed to + * "tablespace_map.old". + * + * Note that this will render an online backup in progress + * useless. To correctly finish an online backup, pg_stop_backup must be + * called. */ void CancelBackup(void) @@ -10535,6 +10857,29 @@ CancelBackup(void) errdetail("Could not rename \"%s\" to \"%s\": %m.", BACKUP_LABEL_FILE, BACKUP_LABEL_OLD))); } + + /* if the tablespace_map file is not there, return */ + if (stat(TABLESPACE_MAP, &stat_buf) < 0) + return; + + /* remove leftover file from previously canceled backup if it exists */ + unlink(TABLESPACE_MAP_OLD); + + if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0) + { + ereport(LOG, + (errmsg("online backup mode canceled"), + errdetail("\"%s\" was renamed to \"%s\".", + TABLESPACE_MAP, TABLESPACE_MAP_OLD))); + } + else + { + ereport(WARNING, + (errcode_for_file_access(), + errmsg("online backup mode was not canceled"), + errdetail("Could not rename \"%s\" to \"%s\": %m.", + TABLESPACE_MAP, TABLESPACE_MAP_OLD))); + } } /* diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 2179bf719e..329bb8ca25 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS) bool fast = PG_GETARG_BOOL(1); char *backupidstr; XLogRecPtr startpoint; + DIR *dir; backupidstr = text_to_cstring(backupid); @@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS) (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to run a backup"))); - startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL); + /* Make sure we can open the directory with tablespaces in it */ + dir = AllocateDir("pg_tblspc"); + if (!dir) + ereport(ERROR, + (errmsg("could not open directory \"%s\": %m", "pg_tblspc"))); + + startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL, + dir, NULL, NULL, false, true); + + FreeDir(dir); PG_RETURN_LSN(startpoint); } diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index de103c6f5b..b341ff64d9 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -46,11 +46,12 @@ typedef struct bool nowait; bool includewal; uint32 maxrate; + bool sendtblspcmapfile; } basebackup_options; -static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces); -static int64 sendTablespace(char *path, bool sizeonly); +static int64 sendDir(char *path, int basepathlen, bool sizeonly, + List *tablespaces, bool sendtblspclinks); static bool sendFile(char *readfilename, char *tarfilename, struct stat * statbuf, bool missing_ok); static void sendFileWithContent(const char *filename, const char *content); @@ -93,15 +94,6 @@ static int64 elapsed_min_unit; /* The last check of the transfer rate. */ static int64 throttled_last; -typedef struct -{ - char *oid; - char *path; - char *rpath; /* relative path within PGDATA, or NULL */ - int64 size; -} tablespaceinfo; - - /* * Called when ERROR or FATAL happens in perform_base_backup() after * we have started the backup - make sure we end it! @@ -126,14 +118,18 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) XLogRecPtr endptr; TimeLineID endtli; char *labelfile; + char *tblspc_map_file = NULL; int datadirpathlen; + List *tablespaces = NIL; datadirpathlen = strlen(DataDir); backup_started_in_recovery = RecoveryInProgress(); startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli, - &labelfile); + &labelfile, tblspcdir, &tablespaces, + &tblspc_map_file, + opt->progress, opt->sendtblspcmapfile); /* * Once do_pg_start_backup has been called, ensure that any failure causes * us to abort the backup so we don't "leak" a backup counter. For this reason, @@ -143,9 +139,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); { - List *tablespaces = NIL; ListCell *lc; - struct dirent *de; tablespaceinfo *ti; SendXlogRecPtrResult(startptr, starttli); @@ -162,70 +156,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) else statrelpath = pgstat_stat_directory; - /* Collect information about all tablespaces */ - while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) - { - char fullpath[MAXPGPATH]; - char linkpath[MAXPGPATH]; - char *relpath = NULL; - int rllen; - - /* Skip special stuff */ - if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) - continue; - - snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name); - -#if defined(HAVE_READLINK) || defined(WIN32) - rllen = readlink(fullpath, linkpath, sizeof(linkpath)); - if (rllen < 0) - { - ereport(WARNING, - (errmsg("could not read symbolic link \"%s\": %m", - fullpath))); - continue; - } - else if (rllen >= sizeof(linkpath)) - { - ereport(WARNING, - (errmsg("symbolic link \"%s\" target is too long", - fullpath))); - continue; - } - linkpath[rllen] = '\0'; - - /* - * Relpath holds the relative path of the tablespace directory - * when it's located within PGDATA, or NULL if it's located - * elsewhere. - */ - if (rllen > datadirpathlen && - strncmp(linkpath, DataDir, datadirpathlen) == 0 && - IS_DIR_SEP(linkpath[datadirpathlen])) - relpath = linkpath + datadirpathlen + 1; - - ti = palloc(sizeof(tablespaceinfo)); - ti->oid = pstrdup(de->d_name); - ti->path = pstrdup(linkpath); - ti->rpath = relpath ? pstrdup(relpath) : NULL; - ti->size = opt->progress ? sendTablespace(fullpath, true) : -1; - tablespaces = lappend(tablespaces, ti); -#else - - /* - * If the platform does not have symbolic links, it should not be - * possible to have tablespaces - clearly somebody else created - * them. Warn about it and ignore. - */ - ereport(WARNING, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("tablespaces are not supported on this platform"))); -#endif - } - /* Add a node for the base directory at the end */ ti = palloc0(sizeof(tablespaceinfo)); - ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1; + ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1; tablespaces = lappend(tablespaces, ti); /* Send tablespace header */ @@ -274,8 +207,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) /* In the main tar, include the backup_label first... */ sendFileWithContent(BACKUP_LABEL_FILE, labelfile); - /* ... then the bulk of the files ... */ - sendDir(".", 1, false, tablespaces); + /* + * Send tablespace_map file if required and then the bulk of + * the files. + */ + if (tblspc_map_file && opt->sendtblspcmapfile) + { + sendFileWithContent(TABLESPACE_MAP, tblspc_map_file); + sendDir(".", 1, false, tablespaces, false); + } + else + sendDir(".", 1, false, tablespaces, true); /* ... and pg_control after everything else. */ if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) @@ -567,6 +509,7 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_nowait = false; bool o_wal = false; bool o_maxrate = false; + bool o_tablespace_map = false; MemSet(opt, 0, sizeof(*opt)); foreach(lopt, options) @@ -637,6 +580,15 @@ parse_basebackup_options(List *options, basebackup_options *opt) opt->maxrate = (uint32) maxrate; o_maxrate = true; } + else if (strcmp(defel->defname, "tablespace_map") == 0) + { + if (o_tablespace_map) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + opt->sendtblspcmapfile = true; + o_tablespace_map = true; + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); @@ -865,7 +817,7 @@ sendFileWithContent(const char *filename, const char *content) * * Only used to send auxiliary tablespaces, not PGDATA. */ -static int64 +int64 sendTablespace(char *path, bool sizeonly) { int64 size; @@ -899,7 +851,7 @@ sendTablespace(char *path, bool sizeonly) size = 512; /* Size of the header just added */ /* Send all the files in the tablespace version directory */ - size += sendDir(pathbuf, strlen(path), sizeonly, NIL); + size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true); return size; } @@ -911,9 +863,14 @@ sendTablespace(char *path, bool sizeonly) * * Omit any directory in the tablespaces list, to avoid backing up * tablespaces twice when they were created inside PGDATA. + * + * If sendtblspclinks is true, we need to include symlink + * information in the tar file. If not, we can skip that + * as it will be sent separately in the tablespace_map file. */ static int64 -sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces) +sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, + bool sendtblspclinks) { DIR *dir; struct dirent *de; @@ -941,13 +898,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces) continue; /* - * If there's a backup_label file, it belongs to a backup started by - * the user with pg_start_backup(). It is *not* correct for this - * backup, our backup_label is injected into the tar separately. + * If there's a backup_label or tablespace_map file, it belongs to a + * backup started by the user with pg_start_backup(). It is *not* + * correct for this backup, our backup_label/tablespace_map is injected + * into the tar separately. */ if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0) continue; + if (strcmp(de->d_name, TABLESPACE_MAP) == 0) + continue; + /* * Check if the postmaster has signaled us to exit, and abort with an * error in that case. The error handler further up will call @@ -1120,8 +1081,15 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces) break; } } + + /* + * skip sending directories inside pg_tblspc, if not required. + */ + if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks) + skip_this_dir = true; + if (!skip_this_dir) - size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces); + size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks); } else if (S_ISREG(statbuf.st_mode)) { diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index 7d6d154f9c..e9177ca0db 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -71,13 +71,16 @@ Node *replication_parse_result; %token K_NOWAIT %token K_MAX_RATE %token K_WAL +%token K_TABLESPACE_MAP %token K_TIMELINE %token K_PHYSICAL %token K_LOGICAL %token K_SLOT %type command -%type base_backup start_replication start_logical_replication create_replication_slot drop_replication_slot identify_system timeline_history +%type base_backup start_replication start_logical_replication + create_replication_slot drop_replication_slot identify_system + timeline_history %type base_backup_opt_list %type base_backup_opt %type opt_timeline @@ -119,12 +122,14 @@ identify_system: ; /* - * BASE_BACKUP [LABEL '