]> granicus.if.org Git - postgresql/commitdiff
Fix failure to delete spill files of aborted transactions
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 5 Jan 2018 15:17:10 +0000 (12:17 -0300)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 5 Jan 2018 15:17:10 +0000 (12:17 -0300)
Logical decoding's reorderbuffer.c may spill transaction files to disk
when transactions are large.  These are supposed to be removed when they
become "too old" by xid; but file removal requires the boundary LSNs of
the transaction to be known.  The final_lsn is only set when we see the
commit or abort record for the transaction, but nothing sets the value
for transactions that crash, so the removal code misbehaves -- in
assertion-enabled builds, it crashes by a failed assertion.

To fix, modify the final_lsn of transactions that don't have a value
set, to the LSN of the very latest change in the transaction.  This
causes the spilled files to be removed appropriately.

Author: Atsushi Torikoshi
Reviewed-by: Kyotaro HORIGUCHI, Craig Ringer, Masahiko Sawada
Discussion: https://postgr.es/m/54e4e488-186b-a056-6628-50628e4e4ebc@lab.ntt.co.jp

src/backend/replication/logical/reorderbuffer.c
src/include/replication/reorderbuffer.h

index 81bab8cf1426125594bf75d126d647378eb824a2..7c8f02234bd1a65bef46bd437ae08c3d5e039148 100644 (file)
@@ -1757,8 +1757,8 @@ ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid)
         * Iterate through all (potential) toplevel TXNs and abort all that are
         * older than what possibly can be running. Once we've found the first
         * that is alive we stop, there might be some that acquired an xid earlier
-        * but started writing later, but it's unlikely and they will cleaned up
-        * in a later call to ReorderBufferAbortOld().
+        * but started writing later, but it's unlikely and they will be cleaned
+        * up in a later call to this function.
         */
        dlist_foreach_modify(it, &rb->toplevel_by_lsn)
        {
@@ -1768,6 +1768,21 @@ ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid)
 
                if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
                {
+                       /*
+                        * We set final_lsn on a transaction when we decode its commit or
+                        * abort record, but we never see those records for crashed
+                        * transactions.  To ensure cleanup of these transactions, set
+                        * final_lsn to that of their last change; this causes
+                        * ReorderBufferRestoreCleanup to do the right thing.
+                        */
+                       if (txn->serialized && txn->final_lsn == 0)
+                       {
+                               ReorderBufferChange *last =
+                                       dlist_tail_element(ReorderBufferChange, node, &txn->changes);
+
+                               txn->final_lsn = last->lsn;
+                       }
+
                        elog(DEBUG2, "aborting old transaction %u", txn->xid);
 
                        /* remove potential on-disk data, and deallocate this tx */
index a7ebf29382265a39a675023600f357c844896825..d30ee936a0a2c00b7587fd9841db4940d781d8e0 100644 (file)
@@ -168,6 +168,8 @@ typedef struct ReorderBufferTXN
         * * plain abort record
         * * prepared transaction abort
         * * error during decoding
+        * * for a crashed transaction, the LSN of the last change, regardless of
+        *   what it was.
         * ----
         */
        XLogRecPtr      final_lsn;