]> granicus.if.org Git - postgresql/commitdiff
Improve handling of corrupted two-phase state files at recovery
authorMichael Paquier <michael@paquier.xyz>
Fri, 7 Sep 2018 18:00:16 +0000 (11:00 -0700)
committerMichael Paquier <michael@paquier.xyz>
Fri, 7 Sep 2018 18:00:16 +0000 (11:00 -0700)
When a corrupted two-phase state file is found by WAL replay, be it for
crash recovery or archive recovery, then the file is simply skipped and
a WARNING is logged to the user, causing the transaction to be silently
lost.  Facing an on-disk WAL file which is corrupted is as likely to
happen as what is stored in WAL records, but WAL records are already
able to fail hard if there is a CRC mismatch.  On-disk two-phase state
files, on the contrary, are simply ignored if corrupted.  Note that when
restoring the initial two-phase data state at recovery, files newer than
the horizon XID are discarded hence no files present in pg_twophase/
should be torned and have been made durable by a previous checkpoint, so
recovery should never see any corrupted two-phase state file by design.

The situation got better since 978b2f6 which has added two-phase state
information directly in WAL instead of using on-disk files, so the risk
is limited to two-phase transactions which live across at least one
checkpoint for long periods.  Backups having legit two-phase state files
on-disk could also lose silently transactions when restored if things
get corrupted.

This behavior exists since two-phase commit has been introduced, no
back-patch is done for now per the lack of complaints about this
problem.

Author: Michael Paquier
Discussion: https://postgr.es/m/20180709050309.GM1467@paquier.xyz

src/backend/access/transam/twophase.c

index 4aff6cf7f278965d73019f7cf9ddb3ea813994ee..c06bbe72646c1896f8db7b715dd0c03c30fedc3e 100644 (file)
@@ -1207,10 +1207,12 @@ RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
  * Read and validate the state file for xid.
  *
  * If it looks OK (has a valid magic number and CRC), return the palloc'd
- * contents of the file.  Otherwise return NULL.
+ * contents of the file, issuing an error when finding corrupted data.  If
+ * missing_ok is true, which indicates that missing files can be safely
+ * ignored, then return NULL.  This state can be reached when doing recovery.
  */
 static char *
-ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
+ReadTwoPhaseFile(TransactionId xid, bool missing_ok)
 {
        char            path[MAXPGPATH];
        char       *buf;
@@ -1227,11 +1229,12 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
        fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
        if (fd < 0)
        {
-               if (give_warnings)
-                       ereport(WARNING,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open file \"%s\": %m", path)));
-               return NULL;
+               if (missing_ok && errno == ENOENT)
+                       return NULL;
+
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not open file \"%s\": %m", path)));
        }
 
        /*
@@ -1241,35 +1244,27 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
         * even on a valid file.
         */
        if (fstat(fd, &stat))
-       {
-               int                     save_errno = errno;
-
-               CloseTransientFile(fd);
-               if (give_warnings)
-               {
-                       errno = save_errno;
-                       ereport(WARNING,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not stat file \"%s\": %m", path)));
-               }
-               return NULL;
-       }
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not stat file \"%s\": %m", path)));
 
        if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
                                                MAXALIGN(sizeof(TwoPhaseRecordOnDisk)) +
                                                sizeof(pg_crc32c)) ||
                stat.st_size > MaxAllocSize)
-       {
-               CloseTransientFile(fd);
-               return NULL;
-       }
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                errmsg_plural("incorrect size of file \"%s\": %zu byte",
+                                                          "incorrect size of file \"%s\": %zu bytes",
+                                                          (Size) stat.st_size, path,
+                                                          (Size) stat.st_size)));
 
        crc_offset = stat.st_size - sizeof(pg_crc32c);
        if (crc_offset != MAXALIGN(crc_offset))
-       {
-               CloseTransientFile(fd);
-               return NULL;
-       }
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                errmsg("incorrect alignment of CRC offset for file \"%s\"",
+                                               path)));
 
        /*
         * OK, slurp in the file.
@@ -1280,37 +1275,31 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
        r = read(fd, buf, stat.st_size);
        if (r != stat.st_size)
        {
-               int                     save_errno = errno;
-
-               pgstat_report_wait_end();
-               CloseTransientFile(fd);
-               if (give_warnings)
-               {
-                       if (r < 0)
-                       {
-                               errno = save_errno;
-                               ereport(WARNING,
-                                               (errcode_for_file_access(),
-                                                errmsg("could not read file \"%s\": %m", path)));
-                       }
-                       else
-                               ereport(WARNING,
-                                               (errmsg("could not read file \"%s\": read %d of %zu",
-                                                               path, r, (Size) stat.st_size)));
-               }
-               pfree(buf);
-               return NULL;
+               if (r < 0)
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not read file \"%s\": %m", path)));
+               else
+                       ereport(ERROR,
+                                       (errmsg("could not read file \"%s\": read %d of %zu",
+                                                       path, r, (Size) stat.st_size)));
        }
 
        pgstat_report_wait_end();
        CloseTransientFile(fd);
 
        hdr = (TwoPhaseFileHeader *) buf;
-       if (hdr->magic != TWOPHASE_MAGIC || hdr->total_len != stat.st_size)
-       {
-               pfree(buf);
-               return NULL;
-       }
+       if (hdr->magic != TWOPHASE_MAGIC)
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                errmsg("invalid magic number stored in file \"%s\"",
+                                               path)));
+
+       if (hdr->total_len != stat.st_size)
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                errmsg("invalid size stored in file \"%s\"",
+                                               path)));
 
        INIT_CRC32C(calc_crc);
        COMP_CRC32C(calc_crc, buf, crc_offset);
@@ -1319,10 +1308,10 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
        file_crc = *((pg_crc32c *) (buf + crc_offset));
 
        if (!EQ_CRC32C(calc_crc, file_crc))
-       {
-               pfree(buf);
-               return NULL;
-       }
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATA_CORRUPTED),
+                                errmsg("calculated CRC checksum does not match value stored in file \"%s\"",
+                                               path)));
 
        return buf;
 }
@@ -1431,7 +1420,7 @@ StandbyTransactionIdIsPrepared(TransactionId xid)
                return false;                   /* nothing to do */
 
        /* Read and validate file */
-       buf = ReadTwoPhaseFile(xid, false);
+       buf = ReadTwoPhaseFile(xid, true);
        if (buf == NULL)
                return false;
 
@@ -1479,7 +1468,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
         * to disk if for some reason they have lived for a long time.
         */
        if (gxact->ondisk)
-               buf = ReadTwoPhaseFile(xid, true);
+               buf = ReadTwoPhaseFile(xid, false);
        else
                XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
 
@@ -1874,6 +1863,10 @@ restoreTwoPhaseData(void)
  * write a WAL entry, and so there might be no evidence in WAL of those
  * subxact XIDs.
  *
+ * On corrupted two-phase files, fail immediately.  Keeping around broken
+ * entries and let replay continue causes harm on the system, and a new
+ * backup should be rolled in.
+ *
  * Our other responsibility is to determine and return the oldest valid XID
  * among the prepared xacts (if none, return ShmemVariableCache->nextXid).
  * This is needed to synchronize pg_subtrans startup properly.
@@ -2164,15 +2157,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
        if (fromdisk)
        {
                /* Read and validate file */
-               buf = ReadTwoPhaseFile(xid, true);
-               if (buf == NULL)
-               {
-                       ereport(WARNING,
-                                       (errmsg("removing corrupt two-phase state file for transaction %u",
-                                                       xid)));
-                       RemoveTwoPhaseFile(xid, true);
-                       return NULL;
-               }
+               buf = ReadTwoPhaseFile(xid, false);
        }
        else
        {
@@ -2185,21 +2170,15 @@ ProcessTwoPhaseBuffer(TransactionId xid,
        if (!TransactionIdEquals(hdr->xid, xid))
        {
                if (fromdisk)
-               {
-                       ereport(WARNING,
-                                       (errmsg("removing corrupt two-phase state file for transaction %u",
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_DATA_CORRUPTED),
+                                        errmsg("corrupted two-phase state file for transaction \"%u\"",
                                                        xid)));
-                       RemoveTwoPhaseFile(xid, true);
-               }
                else
-               {
-                       ereport(WARNING,
-                                       (errmsg("removing corrupt two-phase state from memory for transaction %u",
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_DATA_CORRUPTED),
+                                        errmsg("corrupted two-phase state in memory for transaction \"%u\"",
                                                        xid)));
-                       PrepareRedoRemove(xid, true);
-               }
-               pfree(buf);
-               return NULL;
        }
 
        /*