]> granicus.if.org Git - postgresql/commitdiff
Address set of issues with errno handling
authorMichael Paquier <michael@paquier.xyz>
Mon, 25 Jun 2018 02:20:50 +0000 (11:20 +0900)
committerMichael Paquier <michael@paquier.xyz>
Mon, 25 Jun 2018 02:20:50 +0000 (11:20 +0900)
System calls mixed up in error code paths are causing two issues which
several code paths have not correctly handled:
1) For write() calls, sometimes the system may return less bytes than
what has been written without errno being set.  Some paths were careful
enough to consider that case, and assumed that errno should be set to
ENOSPC, other calls missed that.
2) errno generated by a system call is overwritten by other system calls
which may succeed once an error code path is taken, causing what is
reported to the user to be incorrect.

This patch uses the brute-force approach of correcting all those code
paths.  Some refactoring could happen in the future, but this is let as
future work, which is not targeted for back-branches anyway.

Author: Michael Paquier
Reviewed-by: Ashutosh Sharma
Discussion: https://postgr.es/m/20180622061535.GD5215@paquier.xyz

src/backend/access/heap/rewriteheap.c
src/backend/access/transam/twophase.c
src/backend/access/transam/xlog.c
src/backend/access/transam/xlogutils.c
src/backend/replication/basebackup.c
src/backend/replication/logical/origin.c
src/backend/replication/logical/reorderbuffer.c
src/backend/replication/logical/snapbuild.c
src/backend/replication/slot.c
src/bin/pg_basebackup/receivelog.c

index 170e77dd84f5cfa6a0dccafe65d1be005b8c5767..e628c9bd89d74a33923f3a3311285a381ce15909 100644 (file)
@@ -1163,9 +1163,14 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
 
        /* write out tail end of mapping file (again) */
        if (write(fd, data, len) != len)
+       {
+               /* if write didn't set errno, assume problem is no disk space */
+               if (errno == 0)
+                       errno = ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m", path)));
+       }
 
        /*
         * Now fsync all previously written data. We could improve things and only
index cdd0091a5f0e3685dc8218d21435e9848674d670..c7cde25f835ca62fd43f77b892c6ac7e5991338d 100644 (file)
@@ -1171,12 +1171,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
         */
        if (fstat(fd, &stat))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
                if (give_warnings)
+               {
+                       errno = save_errno;
                        ereport(WARNING,
                                        (errcode_for_file_access(),
                                         errmsg("could not stat two-phase state file \"%s\": %m",
                                                        path)));
+               }
                return NULL;
        }
 
@@ -1203,12 +1208,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 
        if (read(fd, buf, stat.st_size) != stat.st_size)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
                if (give_warnings)
+               {
+                       errno = save_errno;
                        ereport(WARNING,
                                        (errcode_for_file_access(),
                                         errmsg("could not read two-phase state file \"%s\": %m",
                                                        path)));
+               }
                pfree(buf);
                return NULL;
        }
@@ -1550,14 +1560,24 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
        /* Write content and CRC */
        if (write(fd, content, len) != len)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write two-phase state file: %m")));
        }
        if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write two-phase state file: %m")));
@@ -1569,7 +1589,10 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
         */
        if (pg_fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not fsync two-phase state file: %m")));
index 98abc4961f0c851b967a9b72e2af9bc4f4113b1c..248ea9a9766a12049d2814764295fa221893c8b0 100644 (file)
@@ -3087,7 +3087,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 
        if (pg_fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                close(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -11255,8 +11258,10 @@ retry:
        if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
        {
                char            fname[MAXFNAMELEN];
+               int                     save_errno = errno;
 
                XLogFileName(fname, curFileTLI, readSegNo);
+               errno = save_errno;
                ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
                                (errcode_for_file_access(),
                                 errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11267,8 +11272,10 @@ retry:
        if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
        {
                char            fname[MAXFNAMELEN];
+               int                     save_errno = errno;
 
                XLogFileName(fname, curFileTLI, readSegNo);
+               errno = save_errno;
                ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
                                (errcode_for_file_access(),
                                 errmsg("could not read from log segment %s, offset %u: %m",
index 51a8e8ddb2ec1fb3d42a4497ea80df3b32ba4bd8..a82ccd4569751a0520bc37f1a44372dda0b8ca85 100644 (file)
@@ -710,9 +710,11 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
                        if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
                        {
                                char            path[MAXPGPATH];
+                               int                     save_errno = errno;
 
                                XLogFilePath(path, tli, sendSegNo);
 
+                               errno = save_errno;
                                ereport(ERROR,
                                                (errcode_for_file_access(),
                                  errmsg("could not seek in log segment %s to offset %u: %m",
@@ -731,9 +733,11 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
                if (readbytes <= 0)
                {
                        char            path[MAXPGPATH];
+                       int                     save_errno = errno;
 
                        XLogFilePath(path, tli, sendSegNo);
 
+                       errno = save_errno;
                        ereport(ERROR,
                                        (errcode_for_file_access(),
                                         errmsg("could not read from log segment %s, offset %u, length %lu: %m",
index 53be5d55e49ca4ea8c30eddc4031a94953335b42..73a56cbf16eda2e86ff6a97f6b43d4a58201ad12 100644 (file)
@@ -392,6 +392,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
                        fp = AllocateFile(pathbuf, "rb");
                        if (fp == NULL)
                        {
+                               int                     save_errno = errno;
+
                                /*
                                 * Most likely reason for this is that the file was already
                                 * removed by a checkpoint, so check for that to get a better
@@ -399,6 +401,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
                                 */
                                CheckXLogRemoved(segno, tli);
 
+                               errno = save_errno;
                                ereport(ERROR,
                                                (errcode_for_file_access(),
                                                 errmsg("could not open file \"%s\": %m", pathbuf)));
index 4bd754df9451093fd4258609e830eb01c2779ca7..c281734fd591259202997cd82c0d9da324e91922 100644 (file)
@@ -549,7 +549,12 @@ CheckPointReplicationOrigin(void)
        /* write magic */
        if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(tmpfd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(PANIC,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m",
@@ -588,7 +593,12 @@ CheckPointReplicationOrigin(void)
                if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
                        sizeof(disk_state))
                {
+                       int                     save_errno = errno;
+
                        CloseTransientFile(tmpfd);
+
+                       /* if write didn't set errno, assume problem is no disk space */
+                       errno = save_errno ? save_errno : ENOSPC;
                        ereport(PANIC,
                                        (errcode_for_file_access(),
                                         errmsg("could not write to file \"%s\": %m",
@@ -604,7 +614,12 @@ CheckPointReplicationOrigin(void)
        FIN_CRC32C(crc);
        if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(tmpfd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(PANIC,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m",
index 93f373c9efdfe6be8d4dac8ad183a65e57dd4d0a..6ed5fc405bde159141a774b677404909792c90f3 100644 (file)
@@ -2352,7 +2352,9 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
                int                     save_errno = errno;
 
                CloseTransientFile(fd);
-               errno = save_errno;
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write to data file for XID %u: %m",
index 901e95ede4f6aa70f0c852f6572f8d43ec03f283..a266876598457cc7ff6eac909c4eceb870fa8773 100644 (file)
@@ -1583,7 +1583,12 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
 
        if ((write(fd, ondisk, needed_length)) != needed_length)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m", tmppath)));
@@ -1599,7 +1604,10 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
         */
        if (pg_fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -1681,7 +1689,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
        readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
        if (readBytes != SnapBuildOnDiskConstantSize)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1707,7 +1718,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
        readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
        if (readBytes != sizeof(SnapBuild))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1722,7 +1736,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
        readBytes = read(fd, ondisk.builder.was_running.was_xip, sz);
        if (readBytes != sz)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1736,7 +1753,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
        readBytes = read(fd, ondisk.builder.committed.xip, sz);
        if (readBytes != sz)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not read file \"%s\", read %d of %d: %m",
index ca0b8c65c6a4219ec96748722a222ed18c23d9b3..59b0d846226689d203aee8613c6077a0eca40c39 100644 (file)
@@ -1085,7 +1085,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
                int                     save_errno = errno;
 
                CloseTransientFile(fd);
-               errno = save_errno;
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(elevel,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m",
@@ -1184,7 +1186,10 @@ RestoreSlotFromDisk(const char *name)
         */
        if (pg_fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(PANIC,
                                (errcode_for_file_access(),
                                 errmsg("could not fsync file \"%s\": %m",
index 3a921ebf2db03f7a19df3ede7854c0d67c618188..c9fb4198b07f1869c115b3d4db2ae7d4012c3dfc 100644 (file)
@@ -149,6 +149,9 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
        {
                if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
                {
+                       /* if write didn't set errno, assume problem is no disk space */
+                       if (errno == 0)
+                               errno = ENOSPC;
                        fprintf(stderr,
                                        _("%s: could not pad transaction log file \"%s\": %s\n"),
                                        progname, fn, strerror(errno));
@@ -334,7 +337,9 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
                 */
                close(fd);
                unlink(tmppath);
-               errno = save_errno;
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
 
                fprintf(stderr, _("%s: could not write timeline history file \"%s\": %s\n"),
                                progname, tmppath, strerror(errno));
@@ -343,7 +348,10 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
 
        if (fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                close(fd);
+               errno = save_errno;
                fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
                                progname, tmppath, strerror(errno));
                return false;
@@ -1185,6 +1193,9 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
                                  copybuf + hdr_len + bytes_written,
                                  bytes_to_write) != bytes_to_write)
                {
+                       /* if write didn't set errno, assume problem is no disk space */
+                       if (errno == 0)
+                               errno = ENOSPC;
                        fprintf(stderr,
                                  _("%s: could not write %u bytes to WAL file \"%s\": %s\n"),
                                        progname, bytes_to_write, current_walfile_name,