]> granicus.if.org Git - postgresql/commitdiff
Fix concurrent locking of tuple update chain
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 26 Jul 2017 21:24:16 +0000 (17:24 -0400)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 26 Jul 2017 21:24:16 +0000 (17:24 -0400)
If several sessions are concurrently locking a tuple update chain with
nonconflicting lock modes using an old snapshot, and they all succeed,
it may happen that some of them fail because of restarting the loop (due
to a concurrent Xmax change) and getting an error in the subsequent pass
while trying to obtain a tuple lock that they already have in some tuple
version.

This can only happen with very high concurrency (where a row is being
both updated and FK-checked by multiple transactions concurrently), but
it's been observed in the field and can have unpleasant consequences
such as an FK check failing to see a tuple that definitely exists:
    ERROR:  insert or update on table "child_table" violates foreign key constraint "fk_constraint_name"
    DETAIL:  Key (keyid)=(123456) is not present in table "parent_table".
(where the key is observably present in the table).

Discussion: https://postgr.es/m/20170714210011.r25mrff4nxjhmf3g@alvherre.pgsql

src/backend/access/heap/heapam.c

index 011f2b9c54274d45d42a308c74afe8e8426c3a50..479c3843f01036a3618540ff57bf9a1d414afadd 100644 (file)
@@ -5524,8 +5524,10 @@ l5:
  * with the given xid, does the current transaction need to wait, fail, or can
  * it continue if it wanted to acquire a lock of the given mode?  "needwait"
  * is set to true if waiting is necessary; if it can continue, then
- * HeapTupleMayBeUpdated is returned.  In case of a conflict, a different
- * HeapTupleSatisfiesUpdate return code is returned.
+ * HeapTupleMayBeUpdated is returned.  If the lock is already held by the
+ * current transaction, return HeapTupleSelfUpdated.  In case of a conflict
+ * with another transaction, a different HeapTupleSatisfiesUpdate return code
+ * is returned.
  *
  * The held status is said to be hypothetical because it might correspond to a
  * lock held by a single Xid, i.e. not a real MultiXactId; we express it this
@@ -5548,8 +5550,9 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
        if (TransactionIdIsCurrentTransactionId(xid))
        {
                /*
-                * Updated by our own transaction? Just return failure.  This
-                * shouldn't normally happen.
+                * The tuple has already been locked by our own transaction.  This is
+                * very rare but can happen if multiple transactions are trying to
+                * lock an ancient version of the same tuple.
                 */
                return HeapTupleSelfUpdated;
        }
@@ -5748,6 +5751,22 @@ l4:
                                                                                                                members[i].xid,
                                                                                                                mode, &needwait);
 
+                                       /*
+                                        * If the tuple was already locked by ourselves in a
+                                        * previous iteration of this (say heap_lock_tuple was
+                                        * forced to restart the locking loop because of a change
+                                        * in xmax), then we hold the lock already on this tuple
+                                        * version and we don't need to do anything; and this is
+                                        * not an error condition either.  We just need to skip
+                                        * this tuple and continue locking the next version in the
+                                        * update chain.
+                                        */
+                                       if (result == HeapTupleSelfUpdated)
+                                       {
+                                               pfree(members);
+                                               goto next;
+                                       }
+
                                        if (needwait)
                                        {
                                                LockBuffer(buf, BUFFER_LOCK_UNLOCK);
@@ -5808,6 +5827,19 @@ l4:
 
                                result = test_lockmode_for_conflict(status, rawxmax, mode,
                                                                                                        &needwait);
+
+                               /*
+                                * If the tuple was already locked by ourselves in a previous
+                                * iteration of this (say heap_lock_tuple was forced to
+                                * restart the locking loop because of a change in xmax), then
+                                * we hold the lock already on this tuple version and we don't
+                                * need to do anything; and this is not an error condition
+                                * either.  We just need to skip this tuple and continue
+                                * locking the next version in the update chain.
+                                */
+                               if (result == HeapTupleSelfUpdated)
+                                       goto next;
+
                                if (needwait)
                                {
                                        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
@@ -5868,6 +5900,7 @@ l4:
 
                END_CRIT_SECTION();
 
+next:
                /* if we find the end of update chain, we're done. */
                if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
                        ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||