From 900d77fa57565ac5881acf464e76a88069605ad2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 28 Sep 2012 15:35:46 -0400 Subject: [PATCH] Fix tar files emitted by pg_basebackup to be POSIX conformant. Back-patch portions of commit 05b555d12bc2ad0d581f48a12b45174db41dc10d. There doesn't seem to be any reason not to fix pg_basebackup fully, but we can't change pg_dump's "magic" string without breaking older versions of pg_restore. Instead, just patch pg_restore to accept either version of the magic string, in hopes of avoiding compatibility problems when 9.3 comes out. I also fixed pg_dump to write the correct 2-block EOF marker, since that won't create a compatibility problem with pg_restore and it could help with some versions of tar. Brian Weaver and Tom Lane --- doc/src/sgml/protocol.sgml | 8 ++-- src/backend/replication/basebackup.c | 63 ++++++++++++++++++---------- src/bin/pg_dump/pg_backup_tar.c | 27 ++++++++---- 3 files changed, 64 insertions(+), 34 deletions(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index e72556303a..3d72a162eb 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1759,9 +1759,11 @@ The commands accepted in walsender mode are: After the second regular result set, one or more CopyResponse results will be sent, one for PGDATA and one for each additional tablespace other than pg_default and pg_global. The data in - the CopyResponse results will be a tar format (using ustar00 - extensions) dump of the tablespace contents. After the tar data is - complete, a final ordinary result set will be sent. + the CopyResponse results will be a tar format (following the + ustar interchange format specified in the POSIX 1003.1-2008 + standard) dump of the tablespace contents, except that the two trailing + blocks of zeroes specified in the standard are omitted. + After the tar data is complete, a final ordinary result set will be sent. diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 0bc88a4040..1b234c64c9 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -572,7 +572,7 @@ sendFileWithContent(const char *filename, const char *content) /* * Include all files from the given directory in the output tar stream. If - * 'sizeonly' is true, we just calculate a total length and return ig, without + * 'sizeonly' is true, we just calculate a total length and return it, without * actually sending anything. */ static int64 @@ -767,11 +767,16 @@ _tarChecksum(char *header) int i, sum; - sum = 0; + /* + * Per POSIX, the checksum is the simple sum of all bytes in the header, + * treating the bytes as unsigned, and treating the checksum field (at + * offset 148) as though it contained 8 spaces. + */ + sum = 8 * ' '; /* presumed value for checksum field */ for (i = 0; i < 512; i++) if (i < 148 || i >= 156) sum += 0xFF & header[i]; - return sum + 256; /* Assume 8 blanks in checksum field */ + return sum; } /* Given the member, write the TAR header & send the file */ @@ -850,9 +855,13 @@ _tarWriteHeader(const char *filename, const char *linktarget, struct stat * statbuf) { char h[512]; - int lastSum = 0; - int sum; + /* + * Note: most of the fields in a tar header are not supposed to be + * null-terminated. We use sprintf, which will write a null after the + * required bytes; that null goes into the first byte of the next field. + * This is okay as long as we fill the fields in order. + */ memset(h, 0, sizeof(h)); /* Name 100 */ @@ -864,8 +873,11 @@ _tarWriteHeader(const char *filename, const char *linktarget, * indicated in the tar format by adding a slash at the end of the * name, the same as for regular directories. */ - h[strlen(filename)] = '/'; - h[strlen(filename) + 1] = '\0'; + int flen = strlen(filename); + + flen = Min(flen, 99); + h[flen] = '/'; + h[flen + 1] = '\0'; } /* Mode 8 */ @@ -875,9 +887,9 @@ _tarWriteHeader(const char *filename, const char *linktarget, sprintf(&h[108], "%07o ", statbuf->st_uid); /* Group 8 */ - sprintf(&h[117], "%07o ", statbuf->st_gid); + sprintf(&h[116], "%07o ", statbuf->st_gid); - /* File size 12 - 11 digits, 1 space, no NUL */ + /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */ if (linktarget != NULL || S_ISDIR(statbuf->st_mode)) /* Symbolic link or directory has size zero */ print_val(&h[124], 0, 8, 11); @@ -888,13 +900,13 @@ _tarWriteHeader(const char *filename, const char *linktarget, /* Mod Time 12 */ sprintf(&h[136], "%011o ", (int) statbuf->st_mtime); - /* Checksum 8 */ - sprintf(&h[148], "%06o ", lastSum); + /* Checksum 8 cannot be calculated until we've filled all other fields */ if (linktarget != NULL) { /* Type - Symbolic link */ sprintf(&h[156], "2"); + /* Link Name 100 */ sprintf(&h[157], "%.99s", linktarget); } else if (S_ISDIR(statbuf->st_mode)) @@ -904,10 +916,11 @@ _tarWriteHeader(const char *filename, const char *linktarget, /* Type - regular file */ sprintf(&h[156], "0"); - /* Link tag 100 (NULL) */ + /* Magic 6 */ + sprintf(&h[257], "ustar"); - /* Magic 6 + Version 2 */ - sprintf(&h[257], "ustar00"); + /* Version 2 */ + sprintf(&h[263], "00"); /* User 32 */ /* XXX: Do we need to care about setting correct username? */ @@ -917,17 +930,21 @@ _tarWriteHeader(const char *filename, const char *linktarget, /* XXX: Do we need to care about setting correct group name? */ sprintf(&h[297], "%.31s", "postgres"); - /* Maj Dev 8 */ - sprintf(&h[329], "%6o ", 0); + /* Major Dev 8 */ + sprintf(&h[329], "%07o ", 0); - /* Min Dev 8 */ - sprintf(&h[337], "%6o ", 0); + /* Minor Dev 8 */ + sprintf(&h[337], "%07o ", 0); - while ((sum = _tarChecksum(h)) != lastSum) - { - sprintf(&h[148], "%06o ", sum); - lastSum = sum; - } + /* Prefix 155 - not used, leave as nulls */ + + /* + * We mustn't overwrite the next field while inserting the checksum. + * Fortunately, the checksum can't exceed 6 octal digits, so we just write + * 6 digits, a space, and a null, which is legal per POSIX. + */ + sprintf(&h[148], "%06o ", _tarChecksum(h)); + /* Now send the completed header. */ pq_putmessage('d', h, 512); } diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c index ced5c13321..7da978fd97 100644 --- a/src/bin/pg_dump/pg_backup_tar.c +++ b/src/bin/pg_dump/pg_backup_tar.c @@ -882,8 +882,10 @@ _CloseArchive(ArchiveHandle *AH) tarClose(AH, th); - /* Add a block of NULLs since it's de-rigeur. */ - for (i = 0; i < 512; i++) + /* + * EOF marker for tar files is two blocks of NULLs. + */ + for (i = 0; i < 512 * 2; i++) { if (fputc(0, ctx->tarFH) == EOF) exit_horribly(modulename, @@ -1032,11 +1034,16 @@ _tarChecksum(char *header) int i, sum; - sum = 0; + /* + * Per POSIX, the checksum is the simple sum of all bytes in the header, + * treating the bytes as unsigned, and treating the checksum field (at + * offset 148) as though it contained 8 spaces. + */ + sum = 8 * ' '; /* presumed value for checksum field */ for (i = 0; i < 512; i++) if (i < 148 || i >= 156) sum += 0xFF & header[i]; - return sum + 256; /* Assume 8 blanks in checksum field */ + return sum; } bool @@ -1050,11 +1057,15 @@ isValidTarHeader(char *header) if (sum != chk) return false; - /* POSIX format */ - if (strncmp(&header[257], "ustar00", 7) == 0) + /* POSIX tar format */ + if (memcmp(&header[257], "ustar\0", 6) == 0 && + memcmp(&header[263], "00", 2) == 0) + return true; + /* GNU tar format */ + if (memcmp(&header[257], "ustar \0", 8) == 0) return true; - /* older format */ - if (strncmp(&header[257], "ustar ", 7) == 0) + /* not-quite-POSIX format written by pre-9.3 pg_dump */ + if (memcmp(&header[257], "ustar00\0", 8) == 0) return true; return false; -- 2.40.0