<listitem>
<para>
Output a <command>tar</command>-format archive suitable for input
- into <application>pg_restore</application>. The tar-format is
- compatible with the directory-format; extracting a tar-format
+ into <application>pg_restore</application>. The tar format is
+ compatible with the directory format: extracting a tar-format
archive produces a valid directory-format archive.
- However, the tar-format does not support compression and has a
- limit of 8 GB on the size of individual tables. Also, the relative
- order of table data items cannot be changed during restore.
+ However, the tar format does not support compression. Also, when
+ using tar format the relative order of table data items cannot be
+ changed during restore.
</para>
</listitem>
</varlistentry>
catalogs might be left in the wrong state.
</para>
- <para>
- Members of tar archives are limited to a size less than 8 GB.
- (This is an inherent limitation of the tar file format.) Therefore
- this format cannot be used if the textual representation of any one table
- exceeds that size. The total size of a tar archive and any of the
- other output formats is not limited, except possibly by the
- operating system.
- </para>
-
<para>
The dump file produced by <application>pg_dump</application>
does not contain the statistics used by the optimizer to make
#include "utils/elog.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
+#include "pgtar.h"
typedef struct
{
*/
-/*
- * Utility routine to print possibly larger than 32 bit integers in a
- * portable fashion. Filled with zeros.
- */
-static void
-print_val(char *s, uint64 val, unsigned int base, size_t len)
-{
- int i;
-
- for (i = len; i > 0; i--)
- {
- int digit = val % base;
-
- s[i - 1] = '0' + digit;
- val = val / base;
- }
-}
-
-/*
- * Maximum file size for a tar member: The limit inherent in the
- * format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed
- * what we can represent in pgoff_t.
- */
-#define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1)
-
-static int
-_tarChecksum(char *header)
-{
- int i,
- sum;
-
- /*
- * Per POSIX, the checksum is the simple sum of all bytes in the header,
- * treating the bytes as unsigned, and treating the checksum field (at
- * offset 148) as though it contained 8 spaces.
- */
- sum = 8 * ' '; /* presumed value for checksum field */
- for (i = 0; i < 512; i++)
- if (i < 148 || i >= 156)
- sum += 0xFF & header[i];
- return sum;
-}
-
/*
* Given the member, write the TAR header & send the file.
*
errmsg("could not open file \"%s\": %m", readfilename)));
}
- /*
- * Some compilers will throw a warning knowing this test can never be true
- * because pgoff_t can't exceed the compared maximum on their platform.
- */
- if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN)
- ereport(ERROR,
- (errmsg("archive member \"%s\" too large for tar format",
- tarfilename)));
-
_tarWriteHeader(tarfilename, NULL, statbuf);
while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
{
char h[512];
- /*
- * Note: most of the fields in a tar header are not supposed to be
- * null-terminated. We use sprintf, which will write a null after the
- * required bytes; that null goes into the first byte of the next field.
- * This is okay as long as we fill the fields in order.
- */
- memset(h, 0, sizeof(h));
-
- /* Name 100 */
- strlcpy(&h[0], filename, 100);
- if (linktarget != NULL || S_ISDIR(statbuf->st_mode))
- {
- /*
- * We only support symbolic links to directories, and this is
- * indicated in the tar format by adding a slash at the end of the
- * name, the same as for regular directories.
- */
- int flen = strlen(filename);
-
- flen = Min(flen, 99);
- h[flen] = '/';
- h[flen + 1] = '\0';
- }
-
- /* Mode 8 */
- sprintf(&h[100], "%07o ", (int) statbuf->st_mode);
-
- /* User ID 8 */
- sprintf(&h[108], "%07o ", statbuf->st_uid);
-
- /* Group 8 */
- sprintf(&h[116], "%07o ", statbuf->st_gid);
-
- /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */
- if (linktarget != NULL || S_ISDIR(statbuf->st_mode))
- /* Symbolic link or directory has size zero */
- print_val(&h[124], 0, 8, 11);
- else
- print_val(&h[124], statbuf->st_size, 8, 11);
- sprintf(&h[135], " ");
-
- /* Mod Time 12 */
- sprintf(&h[136], "%011o ", (int) statbuf->st_mtime);
-
- /* Checksum 8 cannot be calculated until we've filled all other fields */
-
- if (linktarget != NULL)
- {
- /* Type - Symbolic link */
- sprintf(&h[156], "2");
- /* Link Name 100 */
- strlcpy(&h[157], linktarget, 100);
- }
- else if (S_ISDIR(statbuf->st_mode))
- /* Type - directory */
- sprintf(&h[156], "5");
- else
- /* Type - regular file */
- sprintf(&h[156], "0");
-
- /* Magic 6 */
- sprintf(&h[257], "ustar");
-
- /* Version 2 */
- sprintf(&h[263], "00");
-
- /* User 32 */
- /* XXX: Do we need to care about setting correct username? */
- strlcpy(&h[265], "postgres", 32);
-
- /* Group 32 */
- /* XXX: Do we need to care about setting correct group name? */
- strlcpy(&h[297], "postgres", 32);
-
- /* Major Dev 8 */
- sprintf(&h[329], "%07o ", 0);
-
- /* Minor Dev 8 */
- sprintf(&h[337], "%07o ", 0);
-
- /* Prefix 155 - not used, leave as nulls */
-
- /*
- * We mustn't overwrite the next field while inserting the checksum.
- * Fortunately, the checksum can't exceed 6 octal digits, so we just write
- * 6 digits, a space, and a null, which is legal per POSIX.
- */
- sprintf(&h[148], "%06o ", _tarChecksum(h));
+ tarCreateHeader(h, filename, linktarget, statbuf->st_size,
+ statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
+ statbuf->st_mtime,
+ false /* write real POSIX header */);
- /* Now send the completed header. */
pq_putmessage('d', h, 512);
}
#include "postgres_fe.h"
#include "libpq-fe.h"
+#include "pgtar.h"
#include <unistd.h>
#include <dirent.h>
{
char current_path[MAXPGPATH];
char filename[MAXPGPATH];
- int current_len_left;
+ pgoff_t current_len_left = 0;
int current_padding = 0;
char *copybuf = NULL;
FILE *file = NULL;
}
totaldone += 512;
- if (sscanf(copybuf + 124, "%11o", ¤t_len_left) != 1)
- {
- fprintf(stderr, _("%s: could not parse file size\n"),
- progname);
- disconnect_and_exit(1);
- }
+ current_len_left = read_tar_number(©buf[124], 12);
/* Set permissions on the file */
- if (sscanf(©buf[100], "%07o ", &filemode) != 1)
- {
- fprintf(stderr, _("%s: could not parse file mode\n"),
- progname);
- disconnect_and_exit(1);
- }
+ filemode = read_tar_number(©buf[100], 8);
/*
* All files are padded up to 512 bytes
#include "pg_backup.h"
#include "pg_backup_archiver.h"
#include "pg_backup_tar.h"
+#include "pgtar.h"
#include <sys/stat.h>
#include <ctype.h>
ArchiveHandle *AH;
} TAR_MEMBER;
-/*
- * Maximum file size for a tar member: The limit inherent in the
- * format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed
- * what we can represent in pgoff_t.
- */
-#define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1)
-
typedef struct
{
int hasSeek;
static int tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...);
static void _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th);
-static int _tarChecksum(char *th);
static TAR_MEMBER *_tarPositionTo(ArchiveHandle *AH, const char *filename);
static size_t tarRead(void *buf, size_t len, TAR_MEMBER *th);
static size_t tarWrite(const void *buf, size_t len, TAR_MEMBER *th);
return cnt;
}
-static int
-_tarChecksum(char *header)
-{
- int i,
- sum;
-
- /*
- * Per POSIX, the checksum is the simple sum of all bytes in the header,
- * treating the bytes as unsigned, and treating the checksum field (at
- * offset 148) as though it contained 8 spaces.
- */
- sum = 8 * ' '; /* presumed value for checksum field */
- for (i = 0; i < 512; i++)
- if (i < 148 || i >= 156)
- sum += 0xFF & header[i];
- return sum;
-}
-
bool
isValidTarHeader(char *header)
{
int sum;
- int chk = _tarChecksum(header);
+ int chk = tarChecksum(header);
- sscanf(&header[148], "%8o", &sum);
+ sum = read_tar_number(&header[148], 8);
if (sum != chk)
return false;
th->fileLen = ftello(tmp);
fseeko(tmp, 0, SEEK_SET);
- /*
- * Some compilers will throw a warning knowing this test can never be true
- * because pgoff_t can't exceed the compared maximum on their platform.
- */
- if (th->fileLen > MAX_TAR_MEMBER_FILELEN)
- die_horribly(AH, modulename, "archive member too large for tar format\n");
-
_tarWriteHeader(th);
while ((cnt = fread(buf, 1, sizeof(buf), tmp)) > 0)
{
lclContext *ctx = (lclContext *) AH->formatData;
char h[512];
- char tag[100];
+ char tag[100 + 1];
int sum,
chk;
- size_t len;
- unsigned long ullen;
+ pgoff_t len;
pgoff_t hPos;
bool gotBlock = false;
(unsigned long) len);
/* Calc checksum */
- chk = _tarChecksum(h);
- sscanf(&h[148], "%8o", &sum);
+ chk = tarChecksum(h);
+ sum = read_tar_number(&h[148], 8);
/*
* If the checksum failed, see if it is a null block. If so, silently
}
}
- sscanf(&h[0], "%99s", tag);
- sscanf(&h[124], "%12lo", &ullen);
- len = (size_t) ullen;
+ /* Name field is 100 bytes, might not be null-terminated */
+ strlcpy(tag, &h[0], 100 + 1);
+
+ len = read_tar_number(&h[124], 12);
{
- char buf[100];
+ char posbuf[32];
+ char lenbuf[32];
- snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) hPos);
- ahlog(AH, 3, "TOC Entry %s at %s (length %lu, checksum %d)\n",
- tag, buf, (unsigned long) len, sum);
+ snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, (uint64) hPos);
+ snprintf(lenbuf, sizeof(lenbuf), UINT64_FORMAT, (uint64) len);
+ ahlog(AH, 3, "TOC Entry %s at %s (length %s, checksum %d)\n",
+ tag, posbuf, lenbuf, sum);
}
if (chk != sum)
{
- char buf[100];
+ char posbuf[32];
- snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) ftello(ctx->tarFH));
+ snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT,
+ (uint64) ftello(ctx->tarFH));
die_horribly(AH, modulename,
"corrupt tar header found in %s "
"(expected %d, computed %d) file position %s\n",
- tag, sum, chk, buf);
+ tag, sum, chk, posbuf);
}
th->targetFile = strdup(tag);
}
-/*
- * Utility routine to print possibly larger than 32 bit integers in a
- * portable fashion. Filled with zeros.
- */
-static void
-print_val(char *s, uint64 val, unsigned int base, size_t len)
-{
- int i;
-
- for (i = len; i > 0; i--)
- {
- int digit = val % base;
-
- s[i - 1] = '0' + digit;
- val = val / base;
- }
-}
-
-
static void
_tarWriteHeader(TAR_MEMBER *th)
{
char h[512];
- int lastSum = 0;
- int sum;
-
- memset(h, 0, sizeof(h));
-
- /* Name 100 */
- sprintf(&h[0], "%.99s", th->targetFile);
-
- /* Mode 8 */
- sprintf(&h[100], "100600 ");
-
- /* User ID 8 */
- sprintf(&h[108], "004000 ");
-
- /* Group 8 */
- sprintf(&h[116], "002000 ");
-
- /* File size 12 - 11 digits, 1 space, no NUL */
- print_val(&h[124], th->fileLen, 8, 11);
- sprintf(&h[135], " ");
-
- /* Mod Time 12 */
- sprintf(&h[136], "%011o ", (int) time(NULL));
-
- /* Checksum 8 */
- sprintf(&h[148], "%06o ", lastSum);
- /* Type - regular file */
- sprintf(&h[156], "0");
-
- /* Link tag 100 (NULL) */
-
- /* Magic 6 + Version 2 */
- sprintf(&h[257], "ustar00");
-
-#if 0
- /* User 32 */
- sprintf(&h[265], "%.31s", ""); /* How do I get username reliably? Do
- * I need to? */
-
- /* Group 32 */
- sprintf(&h[297], "%.31s", ""); /* How do I get group reliably? Do I
- * need to? */
-
- /* Maj Dev 8 */
- sprintf(&h[329], "%6o ", 0);
-
- /* Min Dev 8 */
- sprintf(&h[337], "%6o ", 0);
-#endif
-
- while ((sum = _tarChecksum(h)) != lastSum)
- {
- sprintf(&h[148], "%06o ", sum);
- lastSum = sum;
- }
+ tarCreateHeader(h, th->targetFile, NULL, th->fileLen,
+ 0600, 04000, 02000, time(NULL),
+ true /* backwards compatible format */);
+ /* Now write the completed header. */
if (fwrite(h, 1, 512, th->tarFH) != 512)
die_horribly(th->AH, modulename, "could not write to output file: %s\n", strerror(errno));
}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * pgtar.h
+ * Functions for manipulating tarfile datastructures (src/port/tar.c)
+ *
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/pgtar.h
+ *
+ *-------------------------------------------------------------------------
+ */
+extern void tarCreateHeader(char *h, const char *filename, const char *linktarget,
+ pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime,
+ bool bogus);
+extern uint64 read_tar_number(const char *s, int len);
+extern int tarChecksum(char *header);
OBJS = $(LIBOBJS) chklocale.o dirmod.o exec.o inet_net_ntop.o noblock.o \
path.o pgcheckdir.o pgmkdirp.o pgsleep.o pgstrcasecmp.o \
- qsort.o qsort_arg.o sprompt.o thread.o
+ qsort.o qsort_arg.o sprompt.o tar.o thread.o
# foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND
OBJS_SRV = $(OBJS:%.o=%_srv.o)
--- /dev/null
+#include "c.h"
+#include "pgtar.h"
+#include <sys/stat.h>
+
+/*
+ * Print a numeric field in a tar header. The field starts at *s and is of
+ * length len; val is the value to be written.
+ *
+ * Per POSIX, the way to write a number is in octal with leading zeroes and
+ * one trailing space (or NUL, but we use space) at the end of the specified
+ * field width.
+ *
+ * However, the given value may not fit in the available space in octal form.
+ * If that's true, we use the GNU extension of writing \200 followed by the
+ * number in base-256 form (ie, stored in binary MSB-first). (Note: here we
+ * support only non-negative numbers, so we don't worry about the GNU rules
+ * for handling negative numbers.)
+ */
+static void
+print_tar_number(char *s, int len, uint64 val)
+{
+ if (val < (((uint64) 1) << ((len - 1) * 3)))
+ {
+ /* Use octal with trailing space */
+ s[--len] = ' ';
+ while (len)
+ {
+ s[--len] = (val & 7) + '0';
+ val >>= 3;
+ }
+ }
+ else
+ {
+ /* Use base-256 with leading \200 */
+ s[0] = '\200';
+ while (len > 1)
+ {
+ s[--len] = (val & 255);
+ val >>= 8;
+ }
+ }
+}
+
+
+/*
+ * Read a numeric field in a tar header. The field starts at *s and is of
+ * length len.
+ *
+ * The POSIX-approved format for a number is octal, ending with a space or
+ * NUL. However, for values that don't fit, we recognize the GNU extension
+ * of \200 followed by the number in base-256 form (ie, stored in binary
+ * MSB-first). (Note: here we support only non-negative numbers, so we don't
+ * worry about the GNU rules for handling negative numbers.)
+ */
+uint64
+read_tar_number(const char *s, int len)
+{
+ uint64 result = 0;
+
+ if (*s == '\200')
+ {
+ /* base-256 */
+ while (--len)
+ {
+ result <<= 8;
+ result |= (unsigned char) (*++s);
+ }
+ }
+ else
+ {
+ /* octal */
+ while (len-- && *s >= '0' && *s <= '7')
+ {
+ result <<= 3;
+ result |= (*s - '0');
+ s++;
+ }
+ }
+ return result;
+}
+
+
+/*
+ * Calculate the tar checksum for a header. The header is assumed to always
+ * be 512 bytes, per the tar standard.
+ */
+int
+tarChecksum(char *header)
+{
+ int i,
+ sum;
+
+ /*
+ * Per POSIX, the checksum is the simple sum of all bytes in the header,
+ * treating the bytes as unsigned, and treating the checksum field (at
+ * offset 148) as though it contained 8 spaces.
+ */
+ sum = 8 * ' '; /* presumed value for checksum field */
+ for (i = 0; i < 512; i++)
+ if (i < 148 || i >= 156)
+ sum += 0xFF & header[i];
+ return sum;
+}
+
+
+/*
+ * Fill in the buffer pointed to by h with a tar format header. This buffer
+ * must always have space for 512 characters, which is a requirement of
+ * the tar format.
+ *
+ * "bogus" says to write the incorrect format marker that was emitted by
+ * pre-9.3 pg_dump. This is to avoid breaking compatibility unnecessarily
+ * with old copies of pg_restore.
+ */
+void
+tarCreateHeader(char *h, const char *filename, const char *linktarget,
+ pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime,
+ bool bogus)
+{
+ memset(h, 0, 512); /* assume tar header size */
+
+ /* Name 100 */
+ strlcpy(&h[0], filename, 100);
+ if (linktarget != NULL || S_ISDIR(mode))
+ {
+ /*
+ * We only support symbolic links to directories, and this is
+ * indicated in the tar format by adding a slash at the end of the
+ * name, the same as for regular directories.
+ */
+ int flen = strlen(filename);
+
+ flen = Min(flen, 99);
+ h[flen] = '/';
+ h[flen + 1] = '\0';
+ }
+
+ /* Mode 8 - this doesn't include the file type bits (S_IFMT) */
+ print_tar_number(&h[100], 8, (mode & 07777));
+
+ /* User ID 8 */
+ print_tar_number(&h[108], 8, uid);
+
+ /* Group 8 */
+ print_tar_number(&h[116], 8, gid);
+
+ /* File size 12 */
+ if (linktarget != NULL || S_ISDIR(mode))
+ /* Symbolic link or directory has size zero */
+ print_tar_number(&h[124], 12, 0);
+ else
+ print_tar_number(&h[124], 12, size);
+
+ /* Mod Time 12 */
+ print_tar_number(&h[136], 12, mtime);
+
+ /* Checksum 8 cannot be calculated until we've filled all other fields */
+
+ if (linktarget != NULL)
+ {
+ /* Type - Symbolic link */
+ h[156] = '2';
+ /* Link Name 100 */
+ strlcpy(&h[157], linktarget, 100);
+ }
+ else if (S_ISDIR(mode))
+ {
+ /* Type - directory */
+ h[156] = '5';
+ }
+ else
+ {
+ /* Type - regular file */
+ h[156] = '0';
+ }
+
+ if (bogus)
+ {
+ /* somebody's incorrect interpretation of Magic 6 + Version 2 */
+ sprintf(&h[257], "ustar00");
+ }
+ else
+ {
+ /* Magic 6 */
+ strcpy(&h[257], "ustar");
+
+ /* Version 2 */
+ memcpy(&h[263], "00", 2);
+ }
+
+ /* User 32 */
+ /* XXX: Do we need to care about setting correct username? */
+ strlcpy(&h[265], "postgres", 32);
+
+ /* Group 32 */
+ /* XXX: Do we need to care about setting correct group name? */
+ strlcpy(&h[297], "postgres", 32);
+
+ /* Major Dev 8 */
+ print_tar_number(&h[329], 8, 0);
+
+ /* Minor Dev 8 */
+ print_tar_number(&h[337], 8, 0);
+
+ /* Prefix 155 - not used, leave as nulls */
+
+ /* Finally, compute and insert the checksum */
+ print_tar_number(&h[148], 8, tarChecksum(h));
+}