]> granicus.if.org Git - postgresql/commitdiff
Map basebackup tablespaces using a tablespace_map file
authorAndrew Dunstan <andrew@dunslane.net>
Tue, 12 May 2015 13:29:10 +0000 (09:29 -0400)
committerAndrew Dunstan <andrew@dunslane.net>
Tue, 12 May 2015 13:29:10 +0000 (09:29 -0400)
Windows can't reliably restore symbolic links from a tar format, so
instead during backup start we create a tablespace_map file, which is
used by the restoring postgres to create the correct links in pg_tblspc.
The backup protocol also now has an option to request this file to be
included in the backup stream, and this is used by pg_basebackup when
operating in tar mode.

This is done on all platforms, not just Windows.

This means that pg_basebackup will not not work in tar mode against 9.4
and older servers, as this protocol option isn't implemented there.

Amit Kapila, reviewed by Dilip Kumar, with a little editing from me.

12 files changed:
doc/src/sgml/backup.sgml
doc/src/sgml/func.sgml
doc/src/sgml/protocol.sgml
doc/src/sgml/ref/pg_basebackup.sgml
src/backend/access/transam/xlog.c
src/backend/access/transam/xlogfuncs.c
src/backend/replication/basebackup.c
src/backend/replication/repl_gram.y
src/backend/replication/repl_scanner.l
src/bin/pg_basebackup/pg_basebackup.c
src/include/access/xlog.h
src/include/replication/basebackup.h

index e25e0d0edf76bc5e0815adb9efc4182e1c08cec7..def43a21da2439c0746d16c42aab89cbd80121c5 100644 (file)
@@ -836,8 +836,11 @@ SELECT pg_start_backup('label');
      <function>pg_start_backup</> creates a <firstterm>backup label</> file,
      called <filename>backup_label</>, in the cluster directory with
      information about your backup, including the start time and label
-     string.  The file is critical to the integrity of the backup, should
-     you need to restore from it.
+     string.  The function also creates a <firstterm>tablespace map</> file,
+     called <filename>tablespace_map</>, in the cluster directory with
+     information about tablespace symbolic links in <filename>pg_tblspc/</>
+     if one or more such link is present.  Both files are critical to the
+     integrity of the backup, should you need to restore from it.
     </para>
 
     <para>
@@ -965,17 +968,20 @@ SELECT pg_stop_backup();
 
    <para>
     It's also worth noting that the <function>pg_start_backup</> function
-    makes a file named <filename>backup_label</> in the database cluster
-    directory, which is removed by <function>pg_stop_backup</>.
-    This file will of course be archived as a part of your backup dump file.
-    The backup label file includes the label string you gave to
-    <function>pg_start_backup</>, as well as the time at which
-    <function>pg_start_backup</> was run, and the name of the starting WAL
-    file.  In case of confusion it is therefore possible to look inside a
-    backup dump file and determine exactly which backup session the dump file
-    came from.  However, this file is not merely for your information; its
-    presence and contents are critical to the proper operation of the system's
-    recovery process.
+    makes files named <filename>backup_label</> and
+    <filename>tablesapce_map</> in the database cluster directory,
+    which are removed by <function>pg_stop_backup</>.  These files will of
+    course be archived as a part of your backup dump file.  The backup label
+    file includes the label string you gave to <function>pg_start_backup</>,
+    as well as the time at which <function>pg_start_backup</> was run, and
+    the name of the starting WAL file.  In case of confusion it is therefore
+    possible to look inside a backup dump file and determine exactly which
+    backup session the dump file came from.  The tablespace map file includes
+    the symbolic link names as they exist in the directory
+    <filename>pg_tblspc/</> and the full path of each symbolic link.
+    These files are not merely for your information; their presence and
+    contents are critical to the proper operation of the system's recovery
+    process.
    </para>
 
    <para>
index 1ee4f634d3a36faa94ac5b61dc0dba4481b67f64..bf8d72e9ff9f1d2e027e2823b416d525247729bd 100644 (file)
@@ -16591,11 +16591,12 @@ SELECT set_config('log_statement_stats', 'off', false);
     <function>pg_start_backup</> accepts an
     arbitrary user-defined label for the backup.  (Typically this would be
     the name under which the backup dump file will be stored.)  The function
-    writes a backup label file (<filename>backup_label</>) into the
-    database cluster's data directory, performs a checkpoint,
-    and then returns the backup's starting transaction log location as text.
-    The user can ignore this result value, but it is
-    provided in case it is useful.
+    writes a backup label file (<filename>backup_label</>) and, if there
+    are any links in the <filename>pg_tblspc/</> directory, a tablespace map
+    file (<filename>tablespace_map</>) into the database cluster's data
+    directory, performs a checkpoint, and then returns the backup's starting
+    transaction log location as text.  The user can ignore this result value,
+    but it is provided in case it is useful.
 <programlisting>
 postgres=# select pg_start_backup('label_goes_here');
  pg_start_backup
@@ -16610,7 +16611,8 @@ postgres=# select pg_start_backup('label_goes_here');
    </para>
 
    <para>
-    <function>pg_stop_backup</> removes the label file created by
+    <function>pg_stop_backup</> removes the label file and, if it exists,
+    the <filename>tablespace_map</> file created by
     <function>pg_start_backup</>, and creates a backup history file in
     the transaction log archive area.  The history file includes the label given to
     <function>pg_start_backup</>, the starting and ending transaction log locations for
index ac13d3201ce269ca958435a881203bbc2c9519f0..d985204566cab88a209a233d102acf8b4de4b0ad 100644 (file)
@@ -1882,7 +1882,7 @@ The commands accepted in walsender mode are:
   </varlistentry>
 
   <varlistentry>
-    <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>]
+    <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>] [<literal>TABLESPACE_MAP</literal>]
      <indexterm><primary>BASE_BACKUP</primary></indexterm>
     </term>
     <listitem>
@@ -1968,6 +1968,19 @@ The commands accepted in walsender mode are:
          </para>
         </listitem>
        </varlistentry>
+
+       <varlistentry>
+        <term><literal>TABLESPACE_MAP</literal></term>
+        <listitem>
+         <para>
+          Include information about symbolic links present in the directory
+          <filename>pg_tblspc</filename> in a file named
+          <filename>tablespace_map</filename>. The tablespace map file includes
+          each symbolic link name as it exists in the directory
+          <filename>pg_tblspc/</> and the full path of that symbolic link.
+         </para>
+        </listitem>
+       </varlistentry>
       </variablelist>
      </para>
      <para>
index 642fccf325f9fb1ac7da0acbe22c5504abb667f8..07d3a5a1dcfa2ce328ccd0527eb06f2a7997a966 100644 (file)
@@ -587,11 +587,23 @@ PostgreSQL documentation
    tablespaces.
   </para>
 
+  <para>
+   When tar format mode is used, it is the user's responsibility to unpack each
+   tar file before starting postgres. If there are additional tablespaces, the
+   tar files for them need to be unpacked in the correct locations. In this
+   case the symbolic links for those tablespaces will be created by Postgres
+   according to the contents of the <filename>tablespace_map</> file that is
+   included in the <filename>base.tar</> file.
+  </para>
+
   <para>
    <application>pg_basebackup</application> works with servers of the same
    or an older major version, down to 9.1. However, WAL streaming mode (-X
-   stream) only works with server version 9.3 and later.
+   stream) only works with server version 9.3 and later, and tar format mode
+   (--format=tar) of the current version only works with server version 9.5
+   or later.
   </para>
+
  </refsect1>
 
  <refsect1>
index 6f7e3bd96b38e732a5ff1b757b90ec0d963d9498..5f0551a3cbdd4643a5419d3f415ba9dd9858be60 100644 (file)
@@ -42,6 +42,7 @@
 #include "pgstat.h"
 #include "postmaster/bgwriter.h"
 #include "postmaster/startup.h"
+#include "replication/basebackup.h"
 #include "replication/logical.h"
 #include "replication/slot.h"
 #include "replication/origin.h"
@@ -824,6 +825,8 @@ static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
 static void pg_start_backup_callback(int code, Datum arg);
 static bool read_backup_label(XLogRecPtr *checkPointLoc,
                                  bool *backupEndRequired, bool *backupFromStandby);
+static bool read_tablespace_map(List **tablespaces);
+
 static void rm_redo_error_callback(void *arg);
 static int     get_sync_bit(int method);
 
@@ -5917,6 +5920,7 @@ StartupXLOG(void)
        bool            wasShutdown;
        bool            reachedStopPoint = false;
        bool            haveBackupLabel = false;
+       bool            haveTblspcMap = false;
        XLogRecPtr      RecPtr,
                                checkPointLoc,
                                EndOfLog;
@@ -6001,16 +6005,6 @@ StartupXLOG(void)
         */
        ValidateXLOGDirectoryStructure();
 
-       /*
-        * Clear out any old relcache cache files.  This is *necessary* if we do
-        * any WAL replay, since that would probably result in the cache files
-        * being out of sync with database reality.  In theory we could leave them
-        * in place if the database had been cleanly shut down, but it seems
-        * safest to just remove them always and let them be rebuilt during the
-        * first backend startup.
-        */
-       RelationCacheInitFileRemove();
-
        /*
         * Initialize on the assumption we want to recover to the latest timeline
         * that's active according to pg_control.
@@ -6080,6 +6074,8 @@ StartupXLOG(void)
        if (read_backup_label(&checkPointLoc, &backupEndRequired,
                                                  &backupFromStandby))
        {
+               List    *tablespaces = NIL;
+
                /*
                 * Archive recovery was requested, and thanks to the backup label
                 * file, we know how far we need to replay to reach consistency. Enter
@@ -6124,6 +6120,59 @@ StartupXLOG(void)
                                         errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
                        wasShutdown = false;    /* keep compiler quiet */
                }
+
+               /* read the tablespace_map file if present and create symlinks. */
+               if (read_tablespace_map(&tablespaces))
+               {
+                       ListCell   *lc;
+                       struct stat st;
+
+                       foreach(lc, tablespaces)
+                       {
+                               tablespaceinfo *ti = lfirst(lc);
+                               char    *linkloc;
+
+                               linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+                               /*
+                                * Remove the existing symlink if any and Create the symlink
+                                * under PGDATA.  We need to use rmtree instead of rmdir as
+                                * the link location might contain directories or files
+                                * corresponding to the actual path. Some tar utilities do
+                                * things that way while extracting symlinks.
+                                */
+                               if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+                               {
+                                       if (!rmtree(linkloc,true))
+                                               ereport(ERROR,
+                                                               (errcode_for_file_access(),
+                                                                errmsg("could not remove directory \"%s\": %m",
+                                                                               linkloc)));
+                               }
+                               else
+                               {
+                                       if (unlink(linkloc) < 0 && errno != ENOENT)
+                                               ereport(ERROR,
+                                                               (errcode_for_file_access(),
+                                                                errmsg("could not remove symbolic link \"%s\": %m",
+                                                                               linkloc)));
+                               }
+
+                               if (symlink(ti->path, linkloc) < 0)
+                                       ereport(ERROR,
+                                                       (errcode_for_file_access(),
+                                                        errmsg("could not create symbolic link \"%s\": %m",
+                                                                       linkloc)));
+
+                               pfree(ti->oid);
+                               pfree(ti->path);
+                               pfree(ti);
+                       }
+
+                       /* set flag to delete it later */
+                       haveTblspcMap = true;
+               }
+
                /* set flag to delete it later */
                haveBackupLabel = true;
        }
@@ -6197,6 +6246,20 @@ StartupXLOG(void)
                wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
        }
 
+       /*
+        * Clear out any old relcache cache files.  This is *necessary* if we do
+        * any WAL replay, since that would probably result in the cache files
+        * being out of sync with database reality.  In theory we could leave them
+        * in place if the database had been cleanly shut down, but it seems
+        * safest to just remove them always and let them be rebuilt during the
+        * first backend startup.  These files needs to be removed from all
+        * directories including pg_tblspc, however the symlinks are created
+        * only after reading tablesapce_map file in case of archive recovery
+        * from backup, so needs to clear old relcache files here after creating
+        * symlinks.
+        */
+       RelationCacheInitFileRemove();
+
        /*
         * If the location of the checkpoint record is not on the expected
         * timeline in the history of the requested timeline, we cannot proceed:
@@ -6466,6 +6529,23 @@ StartupXLOG(void)
                                                                BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
                }
 
+               /*
+                * If there was a tablespace_map file, it's done its job and the
+                * symlinks have been created.  We must get rid of the map file
+                * so that if we crash during recovery, we don't create symlinks
+                * again.  It seems prudent though to just rename the file out of
+                * the way rather than delete it completely.
+                */
+               if (haveTblspcMap)
+               {
+                       unlink(TABLESPACE_MAP_OLD);
+                       if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0)
+                               ereport(FATAL,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not rename file \"%s\" to \"%s\": %m",
+                                                               TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+               }
+
                /* Check that the GUCs used to generate the WAL allow recovery */
                CheckRequiredParameterValues();
 
@@ -9610,16 +9690,27 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
  *
  * There are two kind of backups: exclusive and non-exclusive. An exclusive
  * backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and tablespace map files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/tablespace_map, and they are
+ * removed by pg_stop_backup().
  *
  * A non-exclusive backup is used for the streaming base backups (see
  * src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup label and tablespace map files are not written to disk.
+ * Instead, there would-be contents are returned in *labelfile and *tblspcmapfile,
+ * and the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'tablespace_map'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * tblspcmapfile is required mainly for tar format in windows as native windows
+ * utilities are not able to create symlinks while extracting files from tar.
+ * However for consistency, the same is used for all platforms.
+ *
+ * needtblspcmapfile is true for the cases (exclusive backup and for
+ * non-exclusive backup only when tar format is used for taking backup)
+ * when backup needs to generate tablespace_map file, it is used to
+ * embed escape character before newline character in tablespace path.
  *
  * Returns the minimum WAL position that must be present to restore from this
  * backup, and the corresponding timeline ID in *starttli_p.
@@ -9632,7 +9723,9 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
  */
 XLogRecPtr
 do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
-                                  char **labelfile)
+                                  char **labelfile, DIR *tblspcdir, List **tablespaces,
+                                  char **tblspcmapfile, bool infotbssize,
+                                  bool needtblspcmapfile)
 {
        bool            exclusive = (labelfile == NULL);
        bool            backup_started_in_recovery = false;
@@ -9646,6 +9739,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
        struct stat stat_buf;
        FILE       *fp;
        StringInfoData labelfbuf;
+       StringInfoData tblspc_mapfbuf;
 
        backup_started_in_recovery = RecoveryInProgress();
 
@@ -9717,6 +9811,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
        PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
        {
                bool            gotUniqueStartpoint = false;
+               struct dirent *de;
+               tablespaceinfo *ti;
+               int                     datadirpathlen;
 
                /*
                 * Force an XLOG file switch before the checkpoint, to ensure that the
@@ -9836,6 +9933,98 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
                XLByteToSeg(startpoint, _logSegNo);
                XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
 
+               /*
+                * Construct tablespace_map file
+                */
+               initStringInfo(&tblspc_mapfbuf);
+
+               datadirpathlen = strlen(DataDir);
+
+               /* Collect information about all tablespaces */
+               while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+               {
+                       char            fullpath[MAXPGPATH];
+                       char            linkpath[MAXPGPATH];
+                       char       *relpath = NULL;
+                       int                     rllen;
+                       StringInfoData buflinkpath;
+                       char    *s = linkpath;
+
+                       /* Skip special stuff */
+                       if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+                               continue;
+
+                       snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+                       rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+                       if (rllen < 0)
+                       {
+                               ereport(WARNING,
+                                               (errmsg("could not read symbolic link \"%s\": %m",
+                                                               fullpath)));
+                               continue;
+                       }
+                       else if (rllen >= sizeof(linkpath))
+                       {
+                               ereport(WARNING,
+                                               (errmsg("symbolic link \"%s\" target is too long",
+                                                               fullpath)));
+                               continue;
+                       }
+                       linkpath[rllen] = '\0';
+
+                       /*
+                        * Add the escape character '\\' before newline in a string
+                        * to ensure that we can distinguish between the newline in
+                        * the tablespace path and end of line while reading
+                        * tablespace_map file during archive recovery.
+                        */
+                       initStringInfo(&buflinkpath);
+
+                       while (*s)
+                       {
+                               if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
+                                       appendStringInfoChar(&buflinkpath, '\\');
+                               appendStringInfoChar(&buflinkpath, *s++);
+                       }
+
+
+                       /*
+                        * Relpath holds the relative path of the tablespace directory
+                        * when it's located within PGDATA, or NULL if it's located
+                        * elsewhere.
+                        */
+                       if (rllen > datadirpathlen &&
+                               strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+                               IS_DIR_SEP(linkpath[datadirpathlen]))
+                               relpath = linkpath + datadirpathlen + 1;
+
+                       ti = palloc(sizeof(tablespaceinfo));
+                       ti->oid = pstrdup(de->d_name);
+                       ti->path = pstrdup(buflinkpath.data);
+                       ti->rpath = relpath ? pstrdup(relpath) : NULL;
+                       ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+                       if(tablespaces)
+                          *tablespaces = lappend(*tablespaces, ti);
+
+                       appendStringInfo(&tblspc_mapfbuf, "%s %s\n", ti->oid, ti->path);
+
+                       pfree(buflinkpath.data);
+#else
+
+                       /*
+                        * If the platform does not have symbolic links, it should not be
+                        * possible to have tablespaces - clearly somebody else created
+                        * them. Warn about it and ignore.
+                        */
+                       ereport(WARNING,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                 errmsg("tablespaces are not supported on this platform")));
+#endif
+               }
+
                /*
                 * Construct backup label file
                 */
@@ -9899,9 +10088,51 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
                                                 errmsg("could not write file \"%s\": %m",
                                                                BACKUP_LABEL_FILE)));
                        pfree(labelfbuf.data);
+
+                       /* Write backup tablespace_map file. */
+                       if (tblspc_mapfbuf.len > 0)
+                       {
+                               if (stat(TABLESPACE_MAP, &stat_buf) != 0)
+                               {
+                                       if (errno != ENOENT)
+                                               ereport(ERROR,
+                                                               (errcode_for_file_access(),
+                                                                errmsg("could not stat file \"%s\": %m",
+                                                                               TABLESPACE_MAP)));
+                               }
+                               else
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                                        errmsg("a backup is already in progress"),
+                                                        errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+                                                                        TABLESPACE_MAP)));
+
+                               fp = AllocateFile(TABLESPACE_MAP, "w");
+
+                               if (!fp)
+                                       ereport(ERROR,
+                                                       (errcode_for_file_access(),
+                                                        errmsg("could not create file \"%s\": %m",
+                                                                       TABLESPACE_MAP)));
+                               if (fwrite(tblspc_mapfbuf.data, tblspc_mapfbuf.len, 1, fp) != 1 ||
+                                       fflush(fp) != 0 ||
+                                       pg_fsync(fileno(fp)) != 0 ||
+                                       ferror(fp) ||
+                                       FreeFile(fp))
+                                       ereport(ERROR,
+                                                       (errcode_for_file_access(),
+                                                        errmsg("could not write file \"%s\": %m",
+                                                                       TABLESPACE_MAP)));
+                       }
+
+                       pfree(tblspc_mapfbuf.data);
                }
                else
+               {
                        *labelfile = labelfbuf.data;
+                       if (tblspc_mapfbuf.len > 0)
+                               *tblspcmapfile = tblspc_mapfbuf.data;
+               }
        }
        PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
 
@@ -10072,6 +10303,12 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
                                        (errcode_for_file_access(),
                                         errmsg("could not remove file \"%s\": %m",
                                                        BACKUP_LABEL_FILE)));
+
+               /*
+                * Remove tablespace_map file if present, it is created
+                * only if there are tablespaces.
+                */
+               unlink(TABLESPACE_MAP);
        }
 
        /*
@@ -10471,6 +10708,86 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
        return true;
 }
 
+/*
+ * read_tablespace_map: check to see if a tablespace_map file is present
+ *
+ * If we see a tablespace_map file during recovery, we assume that we are
+ * recovering from a backup dump file, and we therefore need to create symlinks
+ * as per the information present in tablespace_map file.
+ *
+ * Returns TRUE if a tablespace_map file was found (and fills the link
+ * information for all the tablespace links present in file); returns FALSE
+ * if not.
+ */
+static bool
+read_tablespace_map(List **tablespaces)
+{
+       tablespaceinfo *ti;
+       FILE       *lfp;
+       char            tbsoid[MAXPGPATH];
+       char            *tbslinkpath;
+       char            str[MAXPGPATH];
+       int                     ch, prev_ch = -1,
+                               i = 0, n;
+
+       /*
+        * See if tablespace_map file is present
+        */
+       lfp = AllocateFile(TABLESPACE_MAP, "r");
+       if (!lfp)
+       {
+               if (errno != ENOENT)
+                       ereport(FATAL,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not read file \"%s\": %m",
+                                                       TABLESPACE_MAP)));
+               return false;                   /* it's not there, all is fine */
+       }
+
+       /*
+        * Read and parse the link name and path lines from tablespace_map file
+        * (this code is pretty crude, but we are not expecting any variability
+        * in the file format).  While taking backup we embed escape character
+        * '\\' before newline in tablespace path, so that during reading of
+        * tablespace_map file, we could distinguish newline in tablespace path
+        * and end of line.  Now while reading tablespace_map file, remove the
+        * escape character that has been added in tablespace path during backup.
+        */
+       while ((ch = fgetc(lfp)) != EOF)
+       {
+               if ((ch == '\n' || ch == '\r') && prev_ch != '\\')
+               {
+                       str[i] = '\0';
+                       if (sscanf(str, "%s %n", tbsoid, &n) != 1)
+                               ereport(FATAL,
+                                       (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                               errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
+                       tbslinkpath = str + n;
+                       i = 0;
+
+                       ti = palloc(sizeof(tablespaceinfo));
+                       ti->oid = pstrdup(tbsoid);
+                       ti->path = pstrdup(tbslinkpath);
+
+                       *tablespaces = lappend(*tablespaces, ti);
+                       continue;
+               }
+               else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
+                       str[i-1] = ch;
+               else
+                       str[i++] = ch;
+               prev_ch = ch;
+       }
+
+       if (ferror(lfp) || FreeFile(lfp))
+               ereport(FATAL,
+                               (errcode_for_file_access(),
+                                errmsg("could not read file \"%s\": %m",
+                                               TABLESPACE_MAP)));
+
+       return true;
+}
+
 /*
  * Error context callback for errors occurring during rm_redo().
  */
@@ -10502,11 +10819,16 @@ BackupInProgress(void)
 }
 
 /*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label" and "tablespace_map"
+ *               files to cancel backup mode
  *
  * If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "tablespace_map" file exists, it will be renamed to
+ * "tablespace_map.old".
+ *
+ * Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
  */
 void
 CancelBackup(void)
@@ -10535,6 +10857,29 @@ CancelBackup(void)
                                 errdetail("Could not rename \"%s\" to \"%s\": %m.",
                                                   BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
        }
+
+       /* if the tablespace_map file is not there, return */
+       if (stat(TABLESPACE_MAP, &stat_buf) < 0)
+               return;
+
+       /* remove leftover file from previously canceled backup if it exists */
+       unlink(TABLESPACE_MAP_OLD);
+
+       if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
+       {
+               ereport(LOG,
+                               (errmsg("online backup mode canceled"),
+                                errdetail("\"%s\" was renamed to \"%s\".",
+                                                  TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+       }
+       else
+       {
+               ereport(WARNING,
+                               (errcode_for_file_access(),
+                                errmsg("online backup mode was not canceled"),
+                                errdetail("Could not rename \"%s\" to \"%s\": %m.",
+                                                  TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+       }
 }
 
 /*
index 2179bf719e1ff1bf14ccdf42f0382abca29a9e49..329bb8ca25d55d4577c08be6f03d635ed23885d9 100644 (file)
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
        bool            fast = PG_GETARG_BOOL(1);
        char       *backupidstr;
        XLogRecPtr      startpoint;
+       DIR                *dir;
 
        backupidstr = text_to_cstring(backupid);
 
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
                                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                   errmsg("must be superuser or replication role to run a backup")));
 
-       startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+       /* Make sure we can open the directory with tablespaces in it */
+       dir = AllocateDir("pg_tblspc");
+       if (!dir)
+               ereport(ERROR,
+                               (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+       startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+                                                                       dir, NULL, NULL, false, true);
+
+       FreeDir(dir);
 
        PG_RETURN_LSN(startpoint);
 }
index de103c6f5b7300283915e2ece36867e51465a7cf..b341ff64d9e276101d06d25a61d1270fab916969 100644 (file)
@@ -46,11 +46,12 @@ typedef struct
        bool            nowait;
        bool            includewal;
        uint32          maxrate;
+       bool            sendtblspcmapfile;
 } basebackup_options;
 
 
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+                                        List *tablespaces, bool sendtblspclinks);
 static bool sendFile(char *readfilename, char *tarfilename,
                 struct stat * statbuf, bool missing_ok);
 static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
 /* The last check of the transfer rate. */
 static int64 throttled_last;
 
-typedef struct
-{
-       char       *oid;
-       char       *path;
-       char       *rpath;                      /* relative path within PGDATA, or NULL */
-       int64           size;
-} tablespaceinfo;
-
-
 /*
  * Called when ERROR or FATAL happens in perform_base_backup() after
  * we have started the backup - make sure we end it!
@@ -126,14 +118,18 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
        XLogRecPtr      endptr;
        TimeLineID      endtli;
        char       *labelfile;
+       char       *tblspc_map_file = NULL;
        int                     datadirpathlen;
+       List       *tablespaces = NIL;
 
        datadirpathlen = strlen(DataDir);
 
        backup_started_in_recovery = RecoveryInProgress();
 
        startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
-                                                                 &labelfile);
+                                                                 &labelfile, tblspcdir, &tablespaces,
+                                                                 &tblspc_map_file,
+                                                                 opt->progress, opt->sendtblspcmapfile);
        /*
         * Once do_pg_start_backup has been called, ensure that any failure causes
         * us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +139,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
 
        PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
        {
-               List       *tablespaces = NIL;
                ListCell   *lc;
-               struct dirent *de;
                tablespaceinfo *ti;
 
                SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +156,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
                else
                        statrelpath = pgstat_stat_directory;
 
-               /* Collect information about all tablespaces */
-               while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
-               {
-                       char            fullpath[MAXPGPATH];
-                       char            linkpath[MAXPGPATH];
-                       char       *relpath = NULL;
-                       int                     rllen;
-
-                       /* Skip special stuff */
-                       if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
-                               continue;
-
-                       snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
-                       rllen = readlink(fullpath, linkpath, sizeof(linkpath));
-                       if (rllen < 0)
-                       {
-                               ereport(WARNING,
-                                               (errmsg("could not read symbolic link \"%s\": %m",
-                                                               fullpath)));
-                               continue;
-                       }
-                       else if (rllen >= sizeof(linkpath))
-                       {
-                               ereport(WARNING,
-                                               (errmsg("symbolic link \"%s\" target is too long",
-                                                               fullpath)));
-                               continue;
-                       }
-                       linkpath[rllen] = '\0';
-
-                       /*
-                        * Relpath holds the relative path of the tablespace directory
-                        * when it's located within PGDATA, or NULL if it's located
-                        * elsewhere.
-                        */
-                       if (rllen > datadirpathlen &&
-                               strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
-                               IS_DIR_SEP(linkpath[datadirpathlen]))
-                               relpath = linkpath + datadirpathlen + 1;
-
-                       ti = palloc(sizeof(tablespaceinfo));
-                       ti->oid = pstrdup(de->d_name);
-                       ti->path = pstrdup(linkpath);
-                       ti->rpath = relpath ? pstrdup(relpath) : NULL;
-                       ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
-                       tablespaces = lappend(tablespaces, ti);
-#else
-
-                       /*
-                        * If the platform does not have symbolic links, it should not be
-                        * possible to have tablespaces - clearly somebody else created
-                        * them. Warn about it and ignore.
-                        */
-                       ereport(WARNING,
-                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                 errmsg("tablespaces are not supported on this platform")));
-#endif
-               }
-
                /* Add a node for the base directory at the end */
                ti = palloc0(sizeof(tablespaceinfo));
-               ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+               ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
                tablespaces = lappend(tablespaces, ti);
 
                /* Send tablespace header */
@@ -274,8 +207,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
                                /* In the main tar, include the backup_label first... */
                                sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
 
-                               /* ... then the bulk of the files ... */
-                               sendDir(".", 1, false, tablespaces);
+                               /*
+                                * Send tablespace_map file if required and then the bulk of
+                                * the files.
+                                */
+                               if (tblspc_map_file && opt->sendtblspcmapfile)
+                               {
+                                       sendFileWithContent(TABLESPACE_MAP, tblspc_map_file);
+                                       sendDir(".", 1, false, tablespaces, false);
+                               }
+                               else
+                                       sendDir(".", 1, false, tablespaces, true);
 
                                /* ... and pg_control after everything else. */
                                if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -567,6 +509,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
        bool            o_nowait = false;
        bool            o_wal = false;
        bool            o_maxrate = false;
+       bool            o_tablespace_map = false;
 
        MemSet(opt, 0, sizeof(*opt));
        foreach(lopt, options)
@@ -637,6 +580,15 @@ parse_basebackup_options(List *options, basebackup_options *opt)
                        opt->maxrate = (uint32) maxrate;
                        o_maxrate = true;
                }
+               else if (strcmp(defel->defname, "tablespace_map") == 0)
+               {
+                       if (o_tablespace_map)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("duplicate option \"%s\"", defel->defname)));
+                       opt->sendtblspcmapfile = true;
+                       o_tablespace_map = true;
+               }
                else
                        elog(ERROR, "option \"%s\" not recognized",
                                 defel->defname);
@@ -865,7 +817,7 @@ sendFileWithContent(const char *filename, const char *content)
  *
  * Only used to send auxiliary tablespaces, not PGDATA.
  */
-static int64
+int64
 sendTablespace(char *path, bool sizeonly)
 {
        int64           size;
@@ -899,7 +851,7 @@ sendTablespace(char *path, bool sizeonly)
        size = 512;                                     /* Size of the header just added */
 
        /* Send all the files in the tablespace version directory */
-       size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+       size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
 
        return size;
 }
@@ -911,9 +863,14 @@ sendTablespace(char *path, bool sizeonly)
  *
  * Omit any directory in the tablespaces list, to avoid backing up
  * tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendtblspclinks is true, we need to include symlink
+ * information in the tar file. If not, we can skip that
+ * as it will be sent separately in the tablespace_map file.
  */
 static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+               bool sendtblspclinks)
 {
        DIR                *dir;
        struct dirent *de;
@@ -941,13 +898,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
                        continue;
 
                /*
-                * If there's a backup_label file, it belongs to a backup started by
-                * the user with pg_start_backup(). It is *not* correct for this
-                * backup, our backup_label is injected into the tar separately.
+                * If there's a backup_label or tablespace_map file, it belongs to a
+                * backup started by the user with pg_start_backup(). It is *not*
+                * correct for this backup, our backup_label/tablespace_map is injected
+                * into the tar separately.
                 */
                if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
                        continue;
 
+               if (strcmp(de->d_name, TABLESPACE_MAP) == 0)
+                       continue;
+
                /*
                 * Check if the postmaster has signaled us to exit, and abort with an
                 * error in that case. The error handler further up will call
@@ -1120,8 +1081,15 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
                                        break;
                                }
                        }
+
+                       /*
+                        * skip sending directories inside pg_tblspc, if not required.
+                        */
+                       if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
+                               skip_this_dir = true;
+
                        if (!skip_this_dir)
-                               size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+                               size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
                }
                else if (S_ISREG(statbuf.st_mode))
                {
index 7d6d154f9c24ff34094ab3eff289dd2c1a639a3a..e9177ca0db6d2596ec97a543764a919ed78c9593 100644 (file)
@@ -71,13 +71,16 @@ Node *replication_parse_result;
 %token K_NOWAIT
 %token K_MAX_RATE
 %token K_WAL
+%token K_TABLESPACE_MAP
 %token K_TIMELINE
 %token K_PHYSICAL
 %token K_LOGICAL
 %token K_SLOT
 
 %type <node>   command
-%type <node>   base_backup start_replication start_logical_replication create_replication_slot drop_replication_slot identify_system timeline_history
+%type <node>   base_backup start_replication start_logical_replication
+                               create_replication_slot drop_replication_slot identify_system
+                               timeline_history
 %type <list>   base_backup_opt_list
 %type <defelt> base_backup_opt
 %type <uintval>        opt_timeline
@@ -119,12 +122,14 @@ identify_system:
                        ;
 
 /*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * [MAX_RATE %d] [TABLESPACE_MAP]
  */
 base_backup:
                        K_BASE_BACKUP base_backup_opt_list
                                {
-                                       BaseBackupCmd *cmd = (BaseBackupCmd *) makeNode(BaseBackupCmd);
+                                       BaseBackupCmd *cmd =
+                                               (BaseBackupCmd *) makeNode(BaseBackupCmd);
                                        cmd->options = $2;
                                        $$ = (Node *) cmd;
                                }
@@ -168,6 +173,11 @@ base_backup_opt:
                                  $$ = makeDefElem("max_rate",
                                                                   (Node *)makeInteger($2));
                                }
+                       | K_TABLESPACE_MAP
+                               {
+                                 $$ = makeDefElem("tablespace_map",
+                                                                  (Node *)makeInteger(TRUE));
+                               }
                        ;
 
 create_replication_slot:
index f8acb66552c88dec06d1f7b412d16245b61c9112..056cc1412937bb0bc8011464d75b08498155c225 100644 (file)
@@ -88,6 +88,7 @@ NOWAIT                        { return K_NOWAIT; }
 PROGRESS                       { return K_PROGRESS; }
 MAX_RATE               { return K_MAX_RATE; }
 WAL                    { return K_WAL; }
+TABLESPACE_MAP                 { return K_TABLESPACE_MAP; }
 TIMELINE                       { return K_TIMELINE; }
 START_REPLICATION      { return K_START_REPLICATION; }
 CREATE_REPLICATION_SLOT                { return K_CREATE_REPLICATION_SLOT; }
index 83bf2f5519f1df1bb7c9402fdbc480e33ce38781..2d0ea7bad8671d24dc50b7d8049a9e6166ebe5bb 100644 (file)
@@ -1652,13 +1652,14 @@ BaseBackup(void)
                maxrate_clause = psprintf("MAX_RATE %u", maxrate);
 
        basebkp =
-               psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+               psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
                                 escaped_label,
                                 showprogress ? "PROGRESS" : "",
                                 includewal && !streamwal ? "WAL" : "",
                                 fastcheckpoint ? "FAST" : "",
                                 includewal ? "NOWAIT" : "",
-                                maxrate_clause ? maxrate_clause : "");
+                                maxrate_clause ? maxrate_clause : "",
+                                format == 't'  ? "TABLESPACE_MAP": "");
 
        if (PQsendQuery(conn, basebkp) == 0)
        {
index f08b6767ed78319e5084f46aa77d2893ee228770..961e05062219db528d38256d22e952f36b9a4c87 100644 (file)
@@ -17,6 +17,8 @@
 #include "access/xlogreader.h"
 #include "datatype/timestamp.h"
 #include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
+#include "storage/fd.h"
 
 
 /* Sync methods */
@@ -258,7 +260,9 @@ extern void assign_checkpoint_completion_target(double newval, void *extra);
  * Starting/stopping a base backup
  */
 extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
-                                  TimeLineID *starttli_p, char **labelfile);
+                                  TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+                                  List **tablespaces, char **tblspcmapfile, bool infotbssize,
+                                  bool needtblspcmapfile);
 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
                                  TimeLineID *stoptli_p);
 extern void do_pg_abort_backup(void);
@@ -267,4 +271,7 @@ extern void do_pg_abort_backup(void);
 #define BACKUP_LABEL_FILE              "backup_label"
 #define BACKUP_LABEL_OLD               "backup_label.old"
 
+#define TABLESPACE_MAP                 "tablespace_map"
+#define TABLESPACE_MAP_OLD             "tablespace_map.old"
+
 #endif   /* XLOG_H */
index 64f2bd510e78ee0427785a009c3365037e238510..7d3d09e9d63fc3612228bce1dd26b597c83a82e9 100644 (file)
 #define MAX_RATE_UPPER 1048576
 
 
+typedef struct
+{
+       char       *oid;
+       char       *path;
+       char       *rpath;                      /* relative path within PGDATA, or NULL */
+       int64           size;
+} tablespaceinfo;
+
 extern void SendBaseBackup(BaseBackupCmd *cmd);
 
+extern int64 sendTablespace(char *path, bool sizeonly);
+
 #endif   /* _BASEBACKUP_H */