From: Tom Lane Date: Fri, 28 Sep 2012 19:19:15 +0000 (-0400) Subject: Fix tar files emitted by pg_dump and pg_basebackup to be POSIX conformant. X-Git-Tag: REL9_3_BETA1~873 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=05b555d12bc2ad0d581f48a12b45174db41dc10d;p=postgresql Fix tar files emitted by pg_dump and pg_basebackup to be POSIX conformant. Both programs got the "magic" string wrong, causing standard-conforming tar implementations to believe the output was just legacy tar format without any POSIX extensions. This doesn't actually matter that much, especially since pg_dump failed to fill the POSIX fields anyway, but still there is little point in emitting tar format if we can't be compliant with the standard. In addition, pg_dump failed to write the EOF marker correctly (there should be 2 blocks of zeroes not just one), pg_basebackup put the numeric group ID in the wrong place, and both programs had a pretty brain-dead idea of how to compute the checksum. Fix all that and improve the comments a bit. pg_restore is modified to accept either the correct POSIX-compliant "magic" string or the previous value. This part of the change will need to be back-patched to avoid an unnecessary compatibility break when a previous version tries to read tar-format output from 9.3 pg_dump. Brian Weaver and Tom Lane --- diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index e72556303a..3d72a162eb 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1759,9 +1759,11 @@ The commands accepted in walsender mode are: After the second regular result set, one or more CopyResponse results will be sent, one for PGDATA and one for each additional tablespace other than pg_default and pg_global. The data in - the CopyResponse results will be a tar format (using ustar00 - extensions) dump of the tablespace contents. After the tar data is - complete, a final ordinary result set will be sent. + the CopyResponse results will be a tar format (following the + ustar interchange format specified in the POSIX 1003.1-2008 + standard) dump of the tablespace contents, except that the two trailing + blocks of zeroes specified in the standard are omitted. + After the tar data is complete, a final ordinary result set will be sent. diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 4aaa9e3d08..4636e8d1c6 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -568,7 +568,7 @@ sendFileWithContent(const char *filename, const char *content) /* * Include all files from the given directory in the output tar stream. If - * 'sizeonly' is true, we just calculate a total length and return ig, without + * 'sizeonly' is true, we just calculate a total length and return it, without * actually sending anything. */ static int64 @@ -763,11 +763,16 @@ _tarChecksum(char *header) int i, sum; - sum = 0; + /* + * Per POSIX, the checksum is the simple sum of all bytes in the header, + * treating the bytes as unsigned, and treating the checksum field (at + * offset 148) as though it contained 8 spaces. + */ + sum = 8 * ' '; /* presumed value for checksum field */ for (i = 0; i < 512; i++) if (i < 148 || i >= 156) sum += 0xFF & header[i]; - return sum + 256; /* Assume 8 blanks in checksum field */ + return sum; } /* Given the member, write the TAR header & send the file */ @@ -846,9 +851,13 @@ _tarWriteHeader(const char *filename, const char *linktarget, struct stat * statbuf) { char h[512]; - int lastSum = 0; - int sum; + /* + * Note: most of the fields in a tar header are not supposed to be + * null-terminated. We use sprintf, which will write a null after the + * required bytes; that null goes into the first byte of the next field. + * This is okay as long as we fill the fields in order. + */ memset(h, 0, sizeof(h)); /* Name 100 */ @@ -860,8 +869,11 @@ _tarWriteHeader(const char *filename, const char *linktarget, * indicated in the tar format by adding a slash at the end of the * name, the same as for regular directories. */ - h[strlen(filename)] = '/'; - h[strlen(filename) + 1] = '\0'; + int flen = strlen(filename); + + flen = Min(flen, 99); + h[flen] = '/'; + h[flen + 1] = '\0'; } /* Mode 8 */ @@ -871,9 +883,9 @@ _tarWriteHeader(const char *filename, const char *linktarget, sprintf(&h[108], "%07o ", statbuf->st_uid); /* Group 8 */ - sprintf(&h[117], "%07o ", statbuf->st_gid); + sprintf(&h[116], "%07o ", statbuf->st_gid); - /* File size 12 - 11 digits, 1 space, no NUL */ + /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */ if (linktarget != NULL || S_ISDIR(statbuf->st_mode)) /* Symbolic link or directory has size zero */ print_val(&h[124], 0, 8, 11); @@ -884,13 +896,13 @@ _tarWriteHeader(const char *filename, const char *linktarget, /* Mod Time 12 */ sprintf(&h[136], "%011o ", (int) statbuf->st_mtime); - /* Checksum 8 */ - sprintf(&h[148], "%06o ", lastSum); + /* Checksum 8 cannot be calculated until we've filled all other fields */ if (linktarget != NULL) { /* Type - Symbolic link */ sprintf(&h[156], "2"); + /* Link Name 100 */ sprintf(&h[157], "%.99s", linktarget); } else if (S_ISDIR(statbuf->st_mode)) @@ -900,10 +912,11 @@ _tarWriteHeader(const char *filename, const char *linktarget, /* Type - regular file */ sprintf(&h[156], "0"); - /* Link tag 100 (NULL) */ + /* Magic 6 */ + sprintf(&h[257], "ustar"); - /* Magic 6 + Version 2 */ - sprintf(&h[257], "ustar00"); + /* Version 2 */ + sprintf(&h[263], "00"); /* User 32 */ /* XXX: Do we need to care about setting correct username? */ @@ -913,17 +926,21 @@ _tarWriteHeader(const char *filename, const char *linktarget, /* XXX: Do we need to care about setting correct group name? */ sprintf(&h[297], "%.31s", "postgres"); - /* Maj Dev 8 */ - sprintf(&h[329], "%6o ", 0); + /* Major Dev 8 */ + sprintf(&h[329], "%07o ", 0); - /* Min Dev 8 */ - sprintf(&h[337], "%6o ", 0); + /* Minor Dev 8 */ + sprintf(&h[337], "%07o ", 0); - while ((sum = _tarChecksum(h)) != lastSum) - { - sprintf(&h[148], "%06o ", sum); - lastSum = sum; - } + /* Prefix 155 - not used, leave as nulls */ + + /* + * We mustn't overwrite the next field while inserting the checksum. + * Fortunately, the checksum can't exceed 6 octal digits, so we just write + * 6 digits, a space, and a null, which is legal per POSIX. + */ + sprintf(&h[148], "%06o ", _tarChecksum(h)); + /* Now send the completed header. */ pq_putmessage('d', h, 512); } diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c index ced5c13321..34d2c1a0df 100644 --- a/src/bin/pg_dump/pg_backup_tar.c +++ b/src/bin/pg_dump/pg_backup_tar.c @@ -882,8 +882,10 @@ _CloseArchive(ArchiveHandle *AH) tarClose(AH, th); - /* Add a block of NULLs since it's de-rigeur. */ - for (i = 0; i < 512; i++) + /* + * EOF marker for tar files is two blocks of NULLs. + */ + for (i = 0; i < 512 * 2; i++) { if (fputc(0, ctx->tarFH) == EOF) exit_horribly(modulename, @@ -1032,11 +1034,16 @@ _tarChecksum(char *header) int i, sum; - sum = 0; + /* + * Per POSIX, the checksum is the simple sum of all bytes in the header, + * treating the bytes as unsigned, and treating the checksum field (at + * offset 148) as though it contained 8 spaces. + */ + sum = 8 * ' '; /* presumed value for checksum field */ for (i = 0; i < 512; i++) if (i < 148 || i >= 156) sum += 0xFF & header[i]; - return sum + 256; /* Assume 8 blanks in checksum field */ + return sum; } bool @@ -1050,11 +1057,15 @@ isValidTarHeader(char *header) if (sum != chk) return false; - /* POSIX format */ - if (strncmp(&header[257], "ustar00", 7) == 0) + /* POSIX tar format */ + if (memcmp(&header[257], "ustar\0", 6) == 0 && + memcmp(&header[263], "00", 2) == 0) return true; - /* older format */ - if (strncmp(&header[257], "ustar ", 7) == 0) + /* GNU tar format */ + if (memcmp(&header[257], "ustar \0", 8) == 0) + return true; + /* not-quite-POSIX format written by pre-9.3 pg_dump */ + if (memcmp(&header[257], "ustar00\0", 8) == 0) return true; return false; @@ -1329,63 +1340,71 @@ static void _tarWriteHeader(TAR_MEMBER *th) { char h[512]; - int lastSum = 0; - int sum; + /* + * Note: most of the fields in a tar header are not supposed to be + * null-terminated. We use sprintf, which will write a null after the + * required bytes; that null goes into the first byte of the next field. + * This is okay as long as we fill the fields in order. + */ memset(h, 0, sizeof(h)); /* Name 100 */ sprintf(&h[0], "%.99s", th->targetFile); /* Mode 8 */ - sprintf(&h[100], "100600 "); + sprintf(&h[100], "0000600 "); /* User ID 8 */ - sprintf(&h[108], "004000 "); + sprintf(&h[108], "0004000 "); /* Group 8 */ - sprintf(&h[116], "002000 "); + sprintf(&h[116], "0002000 "); - /* File size 12 - 11 digits, 1 space, no NUL */ + /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */ print_val(&h[124], th->fileLen, 8, 11); sprintf(&h[135], " "); /* Mod Time 12 */ sprintf(&h[136], "%011o ", (int) time(NULL)); - /* Checksum 8 */ - sprintf(&h[148], "%06o ", lastSum); + /* Checksum 8 cannot be calculated until we've filled all other fields */ /* Type - regular file */ sprintf(&h[156], "0"); - /* Link tag 100 (NULL) */ + /* Link Name 100 (leave as nulls) */ - /* Magic 6 + Version 2 */ - sprintf(&h[257], "ustar00"); + /* Magic 6 */ + sprintf(&h[257], "ustar"); + + /* Version 2 */ + sprintf(&h[263], "00"); -#if 0 /* User 32 */ - sprintf(&h[265], "%.31s", ""); /* How do I get username reliably? Do - * I need to? */ + /* XXX: Do we need to care about setting correct username? */ + sprintf(&h[265], "%.31s", "postgres"); /* Group 32 */ - sprintf(&h[297], "%.31s", ""); /* How do I get group reliably? Do I - * need to? */ + /* XXX: Do we need to care about setting correct group name? */ + sprintf(&h[297], "%.31s", "postgres"); - /* Maj Dev 8 */ - sprintf(&h[329], "%6o ", 0); + /* Major Dev 8 */ + sprintf(&h[329], "%07o ", 0); - /* Min Dev 8 */ - sprintf(&h[337], "%6o ", 0); -#endif + /* Minor Dev 8 */ + sprintf(&h[337], "%07o ", 0); - while ((sum = _tarChecksum(h)) != lastSum) - { - sprintf(&h[148], "%06o ", sum); - lastSum = sum; - } + /* Prefix 155 - not used, leave as nulls */ + + /* + * We mustn't overwrite the next field while inserting the checksum. + * Fortunately, the checksum can't exceed 6 octal digits, so we just write + * 6 digits, a space, and a null, which is legal per POSIX. + */ + sprintf(&h[148], "%06o ", _tarChecksum(h)); + /* Now write the completed header. */ if (fwrite(h, 1, 512, th->tarFH) != 512) exit_horribly(modulename, "could not write to output file: %s\n", strerror(errno)); } diff --git a/src/bin/pg_dump/pg_backup_tar.h b/src/bin/pg_dump/pg_backup_tar.h index cb9be645af..0277f08f07 100644 --- a/src/bin/pg_dump/pg_backup_tar.h +++ b/src/bin/pg_dump/pg_backup_tar.h @@ -1,28 +1,31 @@ /* * src/bin/pg_dump/pg_backup_tar.h * - * TAR Header + * TAR Header (see "ustar interchange format" in POSIX 1003.1) * * Offset Length Contents * 0 100 bytes File name ('\0' terminated, 99 maximum length) * 100 8 bytes File mode (in octal ascii) * 108 8 bytes User ID (in octal ascii) * 116 8 bytes Group ID (in octal ascii) - * 124 12 bytes File size (s) (in octal ascii) - * 136 12 bytes Modify time (in octal ascii) + * 124 12 bytes File size (in octal ascii) + * 136 12 bytes Modify time (Unix timestamp in octal ascii) * 148 8 bytes Header checksum (in octal ascii) - * 156 1 bytes Link flag - * 157 100 bytes Linkname ('\0' terminated, 99 maximum length) - * 257 8 bytes Magic ("ustar \0") + * 156 1 bytes Type flag (see below) + * 157 100 bytes Linkname, if symlink ('\0' terminated, 99 maximum length) + * 257 6 bytes Magic ("ustar\0") + * 263 2 bytes Version ("00") * 265 32 bytes User name ('\0' terminated, 31 maximum length) * 297 32 bytes Group name ('\0' terminated, 31 maximum length) * 329 8 bytes Major device ID (in octal ascii) * 337 8 bytes Minor device ID (in octal ascii) - * 345 167 bytes Padding - * 512 (s+p)bytes File contents (s+p) := (((s) + 511) & ~511), round up to 512 bytes + * 345 155 bytes File name prefix (not used in our implementation) + * 500 12 bytes Padding + * + * 512 (s+p)bytes File contents, padded out to 512-byte boundary */ -/* The linkflag defines the type of file */ +/* The type flag defines the type of file */ #define LF_OLDNORMAL '\0' /* Normal disk file, Unix compatible */ #define LF_NORMAL '0' /* Normal disk file */ #define LF_LINK '1' /* Link to previously dumped file */