Repair race condition introduced into heap_update() in 7.1 ---

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 16 May 2001 22:35:12 +0000 (22:35 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 16 May 2001 22:35:12 +0000 (22:35 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 16 May 2001 22:35:12 +0000 (22:35 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 16 May 2001 22:35:12 +0000 (22:35 +0000)
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c

index 7e9b8970204752bc89a9578b5b15f9170f7b37b5..2325a011a1d04a7c69eb6293f3afa1f50479cd32 100644 (file)
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.114 2001/05/12 19:58:27 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.115 2001/05/16 22:35:12 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -1317,7 +1317,7 @@ heap_insert(Relation relation, HeapTuple tup)
  #endif
  
         /* Find buffer for this tuple */
-       buffer = RelationGetBufferForTuple(relation, tup->t_len);
+       buffer = RelationGetBufferForTuple(relation, tup->t_len, 0);
  
         /* NO ELOG(ERROR) from here till changes are logged */
         START_CRIT_SECTION();
@@ -1578,6 +1578,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
                                 newbuf;
         bool            need_toast,
                                 already_marked;
+       Size            newtupsize,
+                               pagefree;
         int                     result;
  
         /* increment access statistics */
@@ -1685,8 +1687,10 @@ l2:
                                   HeapTupleHasExtended(newtup) ||
                                   (MAXALIGN(newtup->t_len) > TOAST_TUPLE_THRESHOLD));
  
-       if (need_toast ||
-               (unsigned) MAXALIGN(newtup->t_len) > PageGetFreeSpace((Page) dp))
+       newtupsize = MAXALIGN(newtup->t_len);
+       pagefree = PageGetFreeSpace((Page) dp);
+
+       if (need_toast || newtupsize > pagefree)
         {
                 _locked_tuple_.node = relation->rd_node;
                 _locked_tuple_.tid = oldtup.t_self;
@@ -1704,17 +1708,60 @@ l2:
  
                 /* Let the toaster do its thing */
                 if (need_toast)
+               {
                         heap_tuple_toast_attrs(relation, newtup, &oldtup);
+                       newtupsize = MAXALIGN(newtup->t_len);
+               }
  
-               /* Now, do we need a new page for the tuple, or not? */
-               if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp))
-                       newbuf = buffer;
+               /*
+                * Now, do we need a new page for the tuple, or not?  This is a bit
+                * tricky since someone else could have added tuples to the page
+                * while we weren't looking.  We have to recheck the available space
+                * after reacquiring the buffer lock.  But don't bother to do that
+                * if the former amount of free space is still not enough; it's
+                * unlikely there's more free now than before.
+                *
+                * What's more, if we need to get a new page, we will need to acquire
+                * buffer locks on both old and new pages.  To avoid deadlock against
+                * some other backend trying to get the same two locks in the other
+                * order, we must be consistent about the order we get the locks in.
+                * We use the rule "lock the higher-numbered page of the relation
+                * first".  To implement this, we must do RelationGetBufferForTuple
+                * while not holding the lock on the old page, and we must tell it
+                * to give us a page beyond the old page.
+                */
+               if (newtupsize > pagefree)
+               {
+                       /* Assume there's no chance to put newtup on same page. */
+                       newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
+                                                                                       BufferGetBlockNumber(buffer) + 1);
+                       /* Now reacquire lock on old tuple's page. */
+                       LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+               }
                 else
-                       newbuf = RelationGetBufferForTuple(relation, newtup->t_len);
-
-               /* Re-acquire the lock on the old tuple's page. */
-               /* this seems to be deadlock free... */
-               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+               {
+                       /* Re-acquire the lock on the old tuple's page. */
+                       LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+                       /* Re-check using the up-to-date free space */
+                       pagefree = PageGetFreeSpace((Page) dp);
+                       if (newtupsize > pagefree)
+                       {
+                               /*
+                                * Rats, it doesn't fit anymore.  We must now unlock and
+                                * relock to avoid deadlock.  Fortunately, this path should
+                                * seldom be taken.
+                                */
+                               LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+                               newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
+                                                                                       BufferGetBlockNumber(buffer) + 1);
+                               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+                       }
+                       else
+                       {
+                               /* OK, it fits here, so we're done. */
+                               newbuf = buffer;
+                       }
+               }
         }
         else
         {
@@ -1723,6 +1770,11 @@ l2:
                 newbuf = buffer;
         }
  
+       /*
+        * At this point newbuf and buffer are both pinned and locked,
+        * and newbuf has enough space for the new tuple.
+        */
+
         /* NO ELOG(ERROR) from here till changes are logged */
         START_CRIT_SECTION();
  
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c

index 7278ecd2deaba2ed027da01fd3e71d54db7dc027..1451dc2ecc5861321dd6cae0df526ea7e9d5ec2d 100644 (file)
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Id: hio.c,v 1.38 2001/05/12 19:58:27 tgl Exp $
+ *       $Id: hio.c,v 1.39 2001/05/16 22:35:12 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -66,18 +66,25 @@ RelationPutHeapTuple(Relation relation,
  /*
   * RelationGetBufferForTuple
   *
- * Returns exclusive-locked buffer with free space >= given len.
+ *     Returns exclusive-locked buffer with free space >= given len,
+ *     being careful to select only a page at or beyond minblocknum
+ *     in the relation.
   *
- * Note that we use LockPage to lock relation for extension. We can
- * do this as long as in all other places we use page-level locking
- * for indices only. Alternatively, we could define pseudo-table as
- * we do for transactions with XactLockTable.
+ *     The minblocknum parameter is needed to prevent deadlock between
+ *     concurrent heap_update operations; see heap_update for details.
+ *     Pass zero if you don't particularly care which page you get.
   *
- * ELOG(ERROR) is allowed here, so this routine *must* be called
- * before any (unlogged) changes are made in buffer pool.
+ *     Note that we use LockPage to lock relation for extension. We can
+ *     do this as long as in all other places we use page-level locking
+ *     for indices only. Alternatively, we could define pseudo-table as
+ *     we do for transactions with XactLockTable.
+ *
+ *     ELOG(ERROR) is allowed here, so this routine *must* be called
+ *     before any (unlogged) changes are made in buffer pool.
   */
  Buffer
-RelationGetBufferForTuple(Relation relation, Size len)
+RelationGetBufferForTuple(Relation relation, Size len,
+                                                 BlockNumber minblocknum)
  {
         Buffer          buffer = InvalidBuffer;
         Page            pageHeader;
@@ -103,16 +110,19 @@ RelationGetBufferForTuple(Relation relation, Size len)
         if (relation->rd_nblocks > 0)
         {
                 lastblock = relation->rd_nblocks - 1;
-               buffer = ReadBuffer(relation, lastblock);
-               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
-               pageHeader = (Page) BufferGetPage(buffer);
-               if (len <= PageGetFreeSpace(pageHeader))
-                       return buffer;
-               /*
-                * Doesn't fit, so we'll have to try someplace else.
-                */
-               LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-               /* buffer release will happen below... */
+               if (lastblock >= minblocknum)
+               {
+                       buffer = ReadBuffer(relation, lastblock);
+                       LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+                       pageHeader = (Page) BufferGetPage(buffer);
+                       if (len <= PageGetFreeSpace(pageHeader))
+                               return buffer;
+                       /*
+                        * Doesn't fit, so we'll have to try someplace else.
+                        */
+                       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+                       /* buffer release will happen below... */
+               }
         }
  
         /*
@@ -137,32 +147,38 @@ RelationGetBufferForTuple(Relation relation, Size len)
          */
         relation->rd_nblocks = RelationGetNumberOfBlocks(relation);
  
-       if (relation->rd_nblocks > oldnblocks)
+       if ((BlockNumber) relation->rd_nblocks > oldnblocks)
         {
                 /*
                  * Someone else has indeed extended the relation recently.
                  * Try to fit our tuple into the new last page.
                  */
                 lastblock = relation->rd_nblocks - 1;
-               buffer = ReleaseAndReadBuffer(buffer, relation, lastblock, false);
-               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
-               pageHeader = (Page) BufferGetPage(buffer);
-               if (len <= PageGetFreeSpace(pageHeader))
+               if (lastblock >= minblocknum)
                 {
-                       /* OK, we don't need to extend again. */
-                       if (!relation->rd_myxactonly)
-                               UnlockPage(relation, 0, ExclusiveLock);
-                       return buffer;
+                       buffer = ReleaseAndReadBuffer(buffer, relation, lastblock, false);
+                       LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+                       pageHeader = (Page) BufferGetPage(buffer);
+                       if (len <= PageGetFreeSpace(pageHeader))
+                       {
+                               /* OK, we don't need to extend again. */
+                               if (!relation->rd_myxactonly)
+                                       UnlockPage(relation, 0, ExclusiveLock);
+                               return buffer;
+                       }
+                       /*
+                        * Doesn't fit, so we'll have to extend the relation (again).
+                        */
+                       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+                       /* buffer release will happen below... */
                 }
-               /*
-                * Doesn't fit, so we'll have to extend the relation (again).
-                */
-               LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-               /* buffer release will happen below... */
         }
  
         /*
          * Extend the relation by one page and update rd_nblocks for next time.
+        *
+        * Note: at this point minblocknum is ignored; we won't extend by more
+        * than one block...
          */
         lastblock = relation->rd_nblocks;
         buffer = ReleaseAndReadBuffer(buffer, relation, lastblock, true);
diff --git a/src/include/access/hio.h b/src/include/access/hio.h

index 4147645134a399f9588561482f459a6ccea7b6e0..8c50a128a2007a90efe0ed05db4b9452147e1b41 100644 (file)
--- a/src/include/access/hio.h
+++ b/src/include/access/hio.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: hio.h,v 1.17 2001/01/24 19:43:19 momjian Exp $
+ * $Id: hio.h,v 1.18 2001/05/16 22:35:12 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -18,6 +18,7 @@
  
  extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
                                          HeapTuple tuple);
-extern Buffer RelationGetBufferForTuple(Relation relation, Size len);
+extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
+                                                                               BlockNumber minblocknum);
  
  #endif  /* HIO_H */
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 16 May 2001 22:35:12 +0000 (22:35 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 16 May 2001 22:35:12 +0000 (22:35 +0000)
src/backend/access/heap/heapam.c		patch \| blob \| history
src/backend/access/heap/hio.c		patch \| blob \| history
src/include/access/hio.h		patch \| blob \| history